From 93da221eaf7ab6ed8afa57f13e5155e6e2286337 Mon Sep 17 00:00:00 2001 From: Simon Moll Date: Mon, 21 Dec 2020 10:59:22 +0100 Subject: [PATCH 001/378] [VP][NFC] ISD::VP_Sub -> ISD::VP_SUB --- llvm/include/llvm/IR/VPIntrinsics.def | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/include/llvm/IR/VPIntrinsics.def b/llvm/include/llvm/IR/VPIntrinsics.def index 3073866da4c97..981548c6dde93 100644 --- a/llvm/include/llvm/IR/VPIntrinsics.def +++ b/llvm/include/llvm/IR/VPIntrinsics.def @@ -131,7 +131,7 @@ HELPER_REGISTER_BINARY_INT_VP(vp_shl, VP_SHL, Shl) HELPER_REGISTER_BINARY_INT_VP(vp_srem, VP_SREM, SRem) // llvm.vp.sub(x,y,mask,vlen) -HELPER_REGISTER_BINARY_INT_VP(vp_sub, VP_Sub, Sub) +HELPER_REGISTER_BINARY_INT_VP(vp_sub, VP_SUB, Sub) // llvm.vp.udiv(x,y,mask,vlen) HELPER_REGISTER_BINARY_INT_VP(vp_udiv, VP_UDIV, UDiv) From cd608dc8d3e975fa3c57327d2146b5c223bcf83a Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Mon, 21 Dec 2020 09:14:57 +0000 Subject: [PATCH 002/378] [VPlan] Use VPDef for VPInterleaveRecipe. This patch turns updates VPInterleaveRecipe to manage the values it defines using VPDef. The VPValue is used during VPlan construction and codegeneration instead of the plain IR reference where possible. Reviewed By: gilr Differential Revision: https://reviews.llvm.org/D90562 --- .../Transforms/Vectorize/LoopVectorize.cpp | 30 +++++++++++-------- .../Transforms/Vectorize/VPRecipeBuilder.h | 4 +-- llvm/lib/Transforms/Vectorize/VPlan.cpp | 8 +++-- llvm/lib/Transforms/Vectorize/VPlan.h | 11 +++++-- .../Transforms/Vectorize/VPlanTest.cpp | 4 ++- 5 files changed, 37 insertions(+), 20 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index e486f7110295c..25deab6d2b359 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -569,6 +569,7 @@ class InnerLoopVectorizer { /// BlockInMask is non-null. Use \p State to translate given VPValues to IR /// values in the vectorized loop. void vectorizeInterleaveGroup(const InterleaveGroup *Group, + ArrayRef VPDefs, VPTransformState &State, VPValue *Addr, ArrayRef StoredValues, VPValue *BlockInMask = nullptr); @@ -2514,8 +2515,9 @@ static bool useMaskedInterleavedAccesses(const TargetTransformInfo &TTI) { // <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11> ; Interleave R,G,B elements // store <12 x i32> %interleaved.vec ; Write 4 tuples of R,G,B void InnerLoopVectorizer::vectorizeInterleaveGroup( - const InterleaveGroup *Group, VPTransformState &State, - VPValue *Addr, ArrayRef StoredValues, VPValue *BlockInMask) { + const InterleaveGroup *Group, ArrayRef VPDefs, + VPTransformState &State, VPValue *Addr, ArrayRef StoredValues, + VPValue *BlockInMask) { Instruction *Instr = Group->getInsertPos(); const DataLayout &DL = Instr->getModule()->getDataLayout(); @@ -2617,6 +2619,7 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup( // For each member in the group, shuffle out the appropriate data from the // wide loads. + unsigned J = 0; for (unsigned I = 0; I < InterleaveFactor; ++I) { Instruction *Member = Group->getMember(I); @@ -2641,8 +2644,9 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup( if (Group->isReverse()) StridedVec = reverseVector(StridedVec); - VectorLoopValueMap.setVectorValue(Member, Part, StridedVec); + State.set(VPDefs[J], Member, StridedVec, Part); } + ++J; } return; } @@ -7980,9 +7984,8 @@ VPValue *VPRecipeBuilder::createBlockInMask(BasicBlock *BB, VPlanPtr &Plan) { return BlockMaskCache[BB] = BlockMask; } -VPWidenMemoryInstructionRecipe * -VPRecipeBuilder::tryToWidenMemory(Instruction *I, VFRange &Range, - VPlanPtr &Plan) { +VPRecipeBase *VPRecipeBuilder::tryToWidenMemory(Instruction *I, VFRange &Range, + VPlanPtr &Plan) { assert((isa(I) || isa(I)) && "Must be called with either a load or store"); @@ -8472,16 +8475,17 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes( if (auto *SI = dyn_cast_or_null(IG->getMember(i))) StoredValues.push_back(Plan->getOrAddVPValue(SI->getOperand(0))); - (new VPInterleaveRecipe(IG, Recipe->getAddr(), StoredValues, - Recipe->getMask())) - ->insertBefore(Recipe); - + auto *VPIG = new VPInterleaveRecipe(IG, Recipe->getAddr(), StoredValues, + Recipe->getMask()); + VPIG->insertBefore(Recipe); + unsigned J = 0; for (unsigned i = 0; i < IG->getFactor(); ++i) if (Instruction *Member = IG->getMember(i)) { if (!Member->getType()->isVoidTy()) { VPValue *OriginalV = Plan->getVPValue(Member); Plan->removeVPValueFor(Member); - OriginalV->replaceAllUsesWith(Plan->getOrAddVPValue(Member)); + OriginalV->replaceAllUsesWith(VPIG->getVPValue(J)); + J++; } RecipeBuilder.getRecipe(Member)->eraseFromParent(); } @@ -8713,8 +8717,8 @@ void VPBlendRecipe::execute(VPTransformState &State) { void VPInterleaveRecipe::execute(VPTransformState &State) { assert(!State.Instance && "Interleave group being replicated."); - State.ILV->vectorizeInterleaveGroup(IG, State, getAddr(), getStoredValues(), - getMask()); + State.ILV->vectorizeInterleaveGroup(IG, definedValues(), State, getAddr(), + getStoredValues(), getMask()); } void VPReductionRecipe::execute(VPTransformState &State) { diff --git a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h index 6f055ca80ff29..41679637167d5 100644 --- a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h +++ b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h @@ -61,8 +61,8 @@ class VPRecipeBuilder { /// Check if the load or store instruction \p I should widened for \p /// Range.Start and potentially masked. Such instructions are handled by a /// recipe that takes an additional VPInstruction for the mask. - VPWidenMemoryInstructionRecipe * - tryToWidenMemory(Instruction *I, VFRange &Range, VPlanPtr &Plan); + VPRecipeBase *tryToWidenMemory(Instruction *I, VFRange &Range, + VPlanPtr &Plan); /// Check if an induction recipe should be constructed for \I. If so build and /// return it. If not, return null. diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index 516a07998854b..e58b49a64737e 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -384,8 +384,12 @@ void VPBasicBlock::execute(VPTransformState *State) { void VPBasicBlock::dropAllReferences(VPValue *NewValue) { for (VPRecipeBase &R : Recipes) { - if (auto *VPV = R.toVPValue()) - VPV->replaceAllUsesWith(NewValue); + if (VPValue *Def = R.toVPValue()) + Def->replaceAllUsesWith(NewValue); + else if (auto *IR = dyn_cast(&R)) { + for (auto *Def : IR->definedValues()) + Def->replaceAllUsesWith(NewValue); + } if (auto *User = R.toVPUser()) for (unsigned I = 0, E = User->getNumOperands(); I != E; I++) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 02ae60990c632..37f1e9e73c397 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1047,7 +1047,7 @@ class VPBlendRecipe : public VPRecipeBase, public VPUser { /// or stores into one wide load/store and shuffles. The first operand of a /// VPInterleave recipe is the address, followed by the stored values, followed /// by an optional mask. -class VPInterleaveRecipe : public VPRecipeBase, public VPUser { +class VPInterleaveRecipe : public VPRecipeBase, public VPDef, public VPUser { const InterleaveGroup *IG; bool HasMask = false; @@ -1055,7 +1055,14 @@ class VPInterleaveRecipe : public VPRecipeBase, public VPUser { public: VPInterleaveRecipe(const InterleaveGroup *IG, VPValue *Addr, ArrayRef StoredValues, VPValue *Mask) - : VPRecipeBase(VPInterleaveSC), VPUser({Addr}), IG(IG) { + : VPRecipeBase(VPInterleaveSC), VPUser(Addr), IG(IG) { + for (unsigned i = 0; i < IG->getFactor(); ++i) + if (Instruction *I = IG->getMember(i)) { + if (I->getType()->isVoidTy()) + continue; + new VPValue(I, this); + } + for (auto *SV : StoredValues) addOperand(SV); if (Mask) { diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp index f89abc8769b41..4864d52ad01e8 100644 --- a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp @@ -8,6 +8,7 @@ //===----------------------------------------------------------------------===// #include "../lib/Transforms/Vectorize/VPlan.h" +#include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "gtest/gtest.h" @@ -470,7 +471,8 @@ TEST(VPRecipeTest, CastVPInterleaveRecipeToVPUser) { VPValue Addr; VPValue Mask; - VPInterleaveRecipe Recipe(nullptr, &Addr, {}, &Mask); + InterleaveGroup IG(4, false, Align(4)); + VPInterleaveRecipe Recipe(&IG, &Addr, {}, &Mask); EXPECT_TRUE(isa(&Recipe)); VPRecipeBase *BaseR = &Recipe; EXPECT_TRUE(isa(BaseR)); From d99e4a4840d833c6e381c2ab76b15451dffb56b2 Mon Sep 17 00:00:00 2001 From: "Kazushi (Jam) Marukawa" Date: Tue, 15 Dec 2020 23:22:39 +0900 Subject: [PATCH 003/378] [VE] Support RETURNADDR Implement RETURNADDR for VE. Add a regression test also. Reviewed By: simoll Differential Revision: https://reviews.llvm.org/D93545 --- llvm/lib/Target/VE/VEISelLowering.cpp | 22 ++++++ llvm/test/CodeGen/VE/Scalar/returnaddr.ll | 91 +++++++++++++++++++++++ 2 files changed, 113 insertions(+) create mode 100644 llvm/test/CodeGen/VE/Scalar/returnaddr.ll diff --git a/llvm/lib/Target/VE/VEISelLowering.cpp b/llvm/lib/Target/VE/VEISelLowering.cpp index 408c28205aa2f..da5b6422f53d6 100644 --- a/llvm/lib/Target/VE/VEISelLowering.cpp +++ b/llvm/lib/Target/VE/VEISelLowering.cpp @@ -1510,6 +1510,26 @@ static SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG, return FrameAddr; } +static SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG, + const VETargetLowering &TLI, + const VESubtarget *Subtarget) { + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo &MFI = MF.getFrameInfo(); + MFI.setReturnAddressIsTaken(true); + + if (TLI.verifyReturnAddressArgumentIsConstant(Op, DAG)) + return SDValue(); + + SDValue FrameAddr = lowerFRAMEADDR(Op, DAG, TLI, Subtarget); + + SDLoc DL(Op); + EVT VT = Op.getValueType(); + SDValue Offset = DAG.getConstant(8, DL, VT); + return DAG.getLoad(VT, DL, DAG.getEntryNode(), + DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset), + MachinePointerInfo()); +} + static SDValue getSplatValue(SDNode *N) { if (auto *BuildVec = dyn_cast(N)) { return BuildVec->getSplatValue(); @@ -1560,6 +1580,8 @@ SDValue VETargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { return lowerJumpTable(Op, DAG); case ISD::LOAD: return lowerLOAD(Op, DAG); + case ISD::RETURNADDR: + return lowerRETURNADDR(Op, DAG, *this, Subtarget); case ISD::BUILD_VECTOR: return lowerBUILD_VECTOR(Op, DAG); case ISD::STORE: diff --git a/llvm/test/CodeGen/VE/Scalar/returnaddr.ll b/llvm/test/CodeGen/VE/Scalar/returnaddr.ll new file mode 100644 index 0000000000000..ea1b5f687c9da --- /dev/null +++ b/llvm/test/CodeGen/VE/Scalar/returnaddr.ll @@ -0,0 +1,91 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=ve-- | FileCheck %s + +define i8* @h() nounwind readnone optsize { +; CHECK-LABEL: h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: st %s9, (, %s11) +; CHECK-NEXT: st %s10, 8(, %s11) +; CHECK-NEXT: or %s9, 0, %s11 +; CHECK-NEXT: lea %s11, -176(, %s11) +; CHECK-NEXT: brge.l.t %s11, %s8, .LBB0_2 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: ld %s61, 24(, %s14) +; CHECK-NEXT: or %s62, 0, %s0 +; CHECK-NEXT: lea %s63, 315 +; CHECK-NEXT: shm.l %s63, (%s61) +; CHECK-NEXT: shm.l %s8, 8(%s61) +; CHECK-NEXT: shm.l %s11, 16(%s61) +; CHECK-NEXT: monc +; CHECK-NEXT: or %s0, 0, %s62 +; CHECK-NEXT: .LBB0_2: # %entry +; CHECK-NEXT: ld %s0, (, %s9) +; CHECK-NEXT: ld %s0, (, %s0) +; CHECK-NEXT: ld %s0, 8(, %s0) +; CHECK-NEXT: or %s11, 0, %s9 +; CHECK-NEXT: ld %s10, 8(, %s11) +; CHECK-NEXT: ld %s9, (, %s11) +; CHECK-NEXT: b.l.t (, %s10) +entry: + %ret = tail call i8* @llvm.returnaddress(i32 2) + ret i8* %ret +} + +declare i8* @llvm.returnaddress(i32) nounwind readnone + +define i8* @g() nounwind readnone optsize { +; CHECK-LABEL: g: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: st %s9, (, %s11) +; CHECK-NEXT: st %s10, 8(, %s11) +; CHECK-NEXT: or %s9, 0, %s11 +; CHECK-NEXT: lea %s11, -176(, %s11) +; CHECK-NEXT: brge.l.t %s11, %s8, .LBB1_2 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: ld %s61, 24(, %s14) +; CHECK-NEXT: or %s62, 0, %s0 +; CHECK-NEXT: lea %s63, 315 +; CHECK-NEXT: shm.l %s63, (%s61) +; CHECK-NEXT: shm.l %s8, 8(%s61) +; CHECK-NEXT: shm.l %s11, 16(%s61) +; CHECK-NEXT: monc +; CHECK-NEXT: or %s0, 0, %s62 +; CHECK-NEXT: .LBB1_2: # %entry +; CHECK-NEXT: ld %s0, (, %s9) +; CHECK-NEXT: ld %s0, 8(, %s0) +; CHECK-NEXT: or %s11, 0, %s9 +; CHECK-NEXT: ld %s10, 8(, %s11) +; CHECK-NEXT: ld %s9, (, %s11) +; CHECK-NEXT: b.l.t (, %s10) +entry: + %ret = tail call i8* @llvm.returnaddress(i32 1) + ret i8* %ret +} + +define i8* @f() nounwind readnone optsize { +; CHECK-LABEL: f: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: st %s9, (, %s11) +; CHECK-NEXT: st %s10, 8(, %s11) +; CHECK-NEXT: or %s9, 0, %s11 +; CHECK-NEXT: lea %s11, -176(, %s11) +; CHECK-NEXT: brge.l.t %s11, %s8, .LBB2_2 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: ld %s61, 24(, %s14) +; CHECK-NEXT: or %s62, 0, %s0 +; CHECK-NEXT: lea %s63, 315 +; CHECK-NEXT: shm.l %s63, (%s61) +; CHECK-NEXT: shm.l %s8, 8(%s61) +; CHECK-NEXT: shm.l %s11, 16(%s61) +; CHECK-NEXT: monc +; CHECK-NEXT: or %s0, 0, %s62 +; CHECK-NEXT: .LBB2_2: # %entry +; CHECK-NEXT: ld %s0, 8(, %s9) +; CHECK-NEXT: or %s11, 0, %s9 +; CHECK-NEXT: ld %s10, 8(, %s11) +; CHECK-NEXT: ld %s9, (, %s11) +; CHECK-NEXT: b.l.t (, %s10) +entry: + %ret = tail call i8* @llvm.returnaddress(i32 0) + ret i8* %ret +} From 5e273b845bc4411c23f5da0ebbf5d4dfd6b91f13 Mon Sep 17 00:00:00 2001 From: "Kazushi (Jam) Marukawa" Date: Fri, 18 Dec 2020 23:21:10 +0900 Subject: [PATCH 004/378] [VE] Support STACKSAVE and STACKRESTORE Change to use default expanded code. Add regression tests also. Reviewed By: simoll Differential Revision: https://reviews.llvm.org/D93539 --- llvm/lib/Target/VE/VEISelLowering.cpp | 4 ++++ llvm/test/CodeGen/VE/Scalar/stacksave.ll | 26 ++++++++++++++++++++++++ 2 files changed, 30 insertions(+) create mode 100644 llvm/test/CodeGen/VE/Scalar/stacksave.ll diff --git a/llvm/lib/Target/VE/VEISelLowering.cpp b/llvm/lib/Target/VE/VEISelLowering.cpp index da5b6422f53d6..ea9281a005021 100644 --- a/llvm/lib/Target/VE/VEISelLowering.cpp +++ b/llvm/lib/Target/VE/VEISelLowering.cpp @@ -135,6 +135,10 @@ void VETargetLowering::initSPUActions() { /// Stack { setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom); + + // Use the default implementation. + setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); + setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); /// } Stack /// Branch { diff --git a/llvm/test/CodeGen/VE/Scalar/stacksave.ll b/llvm/test/CodeGen/VE/Scalar/stacksave.ll new file mode 100644 index 0000000000000..336f9b83455fd --- /dev/null +++ b/llvm/test/CodeGen/VE/Scalar/stacksave.ll @@ -0,0 +1,26 @@ +; RUN: llc < %s -mtriple=ve | FileCheck %s + +; Function Attrs: noinline nounwind optnone +define i8* @stacksave() { +; CHECK-LABEL: stacksave: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: or %s0, 0, %s11 +; CHECK-NEXT: or %s11, 0, %s9 + %ret = call i8* @llvm.stacksave() + ret i8* %ret +} + +; Function Attrs: noinline nounwind optnone +define void @stackrestore(i8* %ptr) { +; CHECK-LABEL: stackrestore: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: or %s11, 0, %s0 +; CHECK-NEXT: or %s11, 0, %s9 + call void @llvm.stackrestore(i8* %ptr) + ret void +} + +; Function Attrs: nounwind +declare i8* @llvm.stacksave() +; Function Attrs: nounwind +declare void @llvm.stackrestore(i8*) From d6abd7317a269dc7d0204edb8e98f8fcc1a18a2f Mon Sep 17 00:00:00 2001 From: Andrzej Warzynski Date: Mon, 21 Dec 2020 11:09:06 +0000 Subject: [PATCH 005/378] [flang][driver] Make the names of files created in unit tests unique (nfc) Using files with identical names leads to unexpected failures when tests are run in parallel. This is tricky to reproduce, but has been happening on some buildbots since merging https://reviews.llvm.org/D92854. In that patch I added a unit test with a non-unique test file. This patch fixes that. --- flang/unittests/Frontend/FrontendActionTest.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/flang/unittests/Frontend/FrontendActionTest.cpp b/flang/unittests/Frontend/FrontendActionTest.cpp index 78161f691effc..b49b7312525a2 100644 --- a/flang/unittests/Frontend/FrontendActionTest.cpp +++ b/flang/unittests/Frontend/FrontendActionTest.cpp @@ -18,7 +18,7 @@ using namespace Fortran::frontend; namespace { TEST(FrontendAction, PrintPreprocessedInput) { - std::string inputFile = "test-file.f"; + std::string inputFile = "pp-test-file.f"; std::error_code ec; // 1. Create the input file for the file manager @@ -78,7 +78,7 @@ TEST(FrontendAction, PrintPreprocessedInput) { } TEST(FrontendAction, ParseSyntaxOnly) { - std::string inputFile = "test-file.f"; + std::string inputFile = "syntax-only-test-file.f"; std::error_code ec; // 1. Create the input file for the file manager From 06b83fd6c75b48a6a93dc580fb4409e13608a045 Mon Sep 17 00:00:00 2001 From: Jan Svoboda Date: Fri, 18 Dec 2020 10:52:53 +0100 Subject: [PATCH 006/378] [TableGen] NFC: Switch to range-based for loops in OptParserEmitter This simplifies the code a bit. No functionality change. Reviewed By: dexonsmith Differential Revision: https://reviews.llvm.org/D93526 --- llvm/utils/TableGen/OptParserEmitter.cpp | 29 ++++++++---------------- 1 file changed, 10 insertions(+), 19 deletions(-) diff --git a/llvm/utils/TableGen/OptParserEmitter.cpp b/llvm/utils/TableGen/OptParserEmitter.cpp index b3fe9d7a91d16..794485256d505 100644 --- a/llvm/utils/TableGen/OptParserEmitter.cpp +++ b/llvm/utils/TableGen/OptParserEmitter.cpp @@ -219,8 +219,7 @@ void EmitOptParser(RecordKeeper &Records, raw_ostream &OS) { PrefixesT Prefixes; Prefixes.insert(std::make_pair(PrefixKeyT(), "prefix_0")); unsigned CurPrefix = 0; - for (unsigned i = 0, e = Opts.size(); i != e; ++i) { - const Record &R = *Opts[i]; + for (const Record &R : llvm::make_pointee_range(Opts)) { std::vector prf = R.getValueAsListOfStrings("Prefixes"); PrefixKeyT prfkey(prf.begin(), prf.end()); unsigned NewPrefix = CurPrefix + 1; @@ -235,19 +234,16 @@ void EmitOptParser(RecordKeeper &Records, raw_ostream &OS) { OS << "// Prefixes\n\n"; OS << "#ifdef PREFIX\n"; OS << "#define COMMA ,\n"; - for (PrefixesT::const_iterator I = Prefixes.begin(), E = Prefixes.end(); - I != E; ++I) { + for (const auto &Prefix : Prefixes) { OS << "PREFIX("; // Prefix name. - OS << I->second; + OS << Prefix.second; // Prefix values. OS << ", {"; - for (PrefixKeyT::const_iterator PI = I->first.begin(), - PE = I->first.end(); PI != PE; ++PI) { - OS << "\"" << *PI << "\" COMMA "; - } + for (StringRef PrefixKey : Prefix.first) + OS << "\"" << PrefixKey << "\" COMMA "; OS << "nullptr})\n"; } OS << "#undef COMMA\n"; @@ -256,9 +252,7 @@ void EmitOptParser(RecordKeeper &Records, raw_ostream &OS) { OS << "/////////\n"; OS << "// Groups\n\n"; OS << "#ifdef OPTION\n"; - for (unsigned i = 0, e = Groups.size(); i != e; ++i) { - const Record &R = *Groups[i]; - + for (const Record &R : llvm::make_pointee_range(Groups)) { // Start a single option entry. OS << "OPTION("; @@ -343,8 +337,8 @@ void EmitOptParser(RecordKeeper &Records, raw_ostream &OS) { OS << "nullptr"; } else { OS << "\""; - for (size_t i = 0, e = AliasArgs.size(); i != e; ++i) - OS << AliasArgs[i] << "\\0"; + for (StringRef AliasArg : AliasArgs) + OS << AliasArg << "\\0"; OS << "\""; } @@ -394,9 +388,7 @@ void EmitOptParser(RecordKeeper &Records, raw_ostream &OS) { }; std::vector OptsWithMarshalling; - for (unsigned I = 0, E = Opts.size(); I != E; ++I) { - const Record &R = *Opts[I]; - + for (const Record &R : llvm::make_pointee_range(Opts)) { // Start a single option entry. OS << "OPTION("; WriteOptRecordFields(OS, R); @@ -462,8 +454,7 @@ void EmitOptParser(RecordKeeper &Records, raw_ostream &OS) { OS << "#ifdef OPTTABLE_ARG_INIT\n"; OS << "//////////\n"; OS << "// Option Values\n\n"; - for (unsigned I = 0, E = Opts.size(); I != E; ++I) { - const Record &R = *Opts[I]; + for (const Record &R : llvm::make_pointee_range(Opts)) { if (isa(R.getValueInit("ValuesCode"))) continue; OS << "{\n"; From 164bcbd40e6d10cd8a01477e2e9029b955fea93b Mon Sep 17 00:00:00 2001 From: Jan Svoboda Date: Fri, 18 Dec 2020 11:18:12 +0100 Subject: [PATCH 007/378] [TableGen] NFC: Rename variables in OptParserEmitter Switch to the LLVM naming convention. Reviewed By: dexonsmith Differential Revision: https://reviews.llvm.org/D93527 --- llvm/utils/TableGen/OptParserEmitter.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/llvm/utils/TableGen/OptParserEmitter.cpp b/llvm/utils/TableGen/OptParserEmitter.cpp index 794485256d505..a08a837e5e70c 100644 --- a/llvm/utils/TableGen/OptParserEmitter.cpp +++ b/llvm/utils/TableGen/OptParserEmitter.cpp @@ -220,11 +220,11 @@ void EmitOptParser(RecordKeeper &Records, raw_ostream &OS) { Prefixes.insert(std::make_pair(PrefixKeyT(), "prefix_0")); unsigned CurPrefix = 0; for (const Record &R : llvm::make_pointee_range(Opts)) { - std::vector prf = R.getValueAsListOfStrings("Prefixes"); - PrefixKeyT prfkey(prf.begin(), prf.end()); + std::vector RPrefixes = R.getValueAsListOfStrings("Prefixes"); + PrefixKeyT PrefixKey(RPrefixes.begin(), RPrefixes.end()); unsigned NewPrefix = CurPrefix + 1; - if (Prefixes.insert(std::make_pair(prfkey, (Twine("prefix_") + - Twine(NewPrefix)).str())).second) + std::string Prefix = (Twine("prefix_") + Twine(NewPrefix)).str(); + if (Prefixes.insert(std::make_pair(PrefixKey, Prefix)).second) CurPrefix = NewPrefix; } @@ -299,8 +299,8 @@ void EmitOptParser(RecordKeeper &Records, raw_ostream &OS) { auto WriteOptRecordFields = [&](raw_ostream &OS, const Record &R) { // The option prefix; - std::vector prf = R.getValueAsListOfStrings("Prefixes"); - OS << Prefixes[PrefixKeyT(prf.begin(), prf.end())] << ", "; + std::vector RPrefixes = R.getValueAsListOfStrings("Prefixes"); + OS << Prefixes[PrefixKeyT(RPrefixes.begin(), RPrefixes.end())] << ", "; // The option string. emitNameUsingSpelling(OS, R); From a3a896d1cdc0fd2f87de4787120eaac08e69eb5f Mon Sep 17 00:00:00 2001 From: "Kazushi (Jam) Marukawa" Date: Fri, 27 Nov 2020 22:51:11 +0900 Subject: [PATCH 008/378] [VE] Optimize LEA combinations Change to optimize references of elements of aggregate data. Also add regression tests. Reviewed By: simoll Differential Revision: https://reviews.llvm.org/D93627 --- llvm/lib/Target/VE/VEInstrInfo.td | 4 ++ llvm/test/CodeGen/VE/Scalar/lea-opt.ll | 63 ++++++++++++++++++++++++++ 2 files changed, 67 insertions(+) create mode 100644 llvm/test/CodeGen/VE/Scalar/lea-opt.ll diff --git a/llvm/lib/Target/VE/VEInstrInfo.td b/llvm/lib/Target/VE/VEInstrInfo.td index fce3bf06b9d3f..debd00ff6f96d 100644 --- a/llvm/lib/Target/VE/VEInstrInfo.td +++ b/llvm/lib/Target/VE/VEInstrInfo.td @@ -1608,6 +1608,8 @@ def vehi_only : OutPatFrag<(ops node:$hi), (LEASLzii 0, 0, $hi)>; def vehi_lo : OutPatFrag<(ops node:$hi, node:$lo), (LEASLrii $lo, 0, $hi)>; +def vehi_lo_imm : OutPatFrag<(ops node:$hi, node:$lo, node:$idx), + (LEASLrii $lo, $idx, $hi)>; def vehi_baselo : OutPatFrag<(ops node:$base, node:$hi, node:$lo), (LEASLrri $base, $lo, $hi)>; foreach type = [ "tblockaddress", "tconstpool", "texternalsym", "tglobaladdr", @@ -1615,6 +1617,8 @@ foreach type = [ "tblockaddress", "tconstpool", "texternalsym", "tglobaladdr", def : Pat<(VElo !cast(type):$lo), (velo_only $lo)>; def : Pat<(VEhi !cast(type):$hi), (vehi_only $hi)>; def : Pat<(add (VEhi !cast(type):$hi), I64:$lo), (vehi_lo $hi, $lo)>; + def : Pat<(add (add (VEhi !cast(type):$hi), I64:$lo), simm7:$val), + (vehi_lo_imm $hi, $lo, (LO7 $val))>; def : Pat<(add I64:$base, (add (VEhi !cast(type):$hi), I64:$lo)), (vehi_baselo $base, $hi, $lo)>; } diff --git a/llvm/test/CodeGen/VE/Scalar/lea-opt.ll b/llvm/test/CodeGen/VE/Scalar/lea-opt.ll new file mode 100644 index 0000000000000..356b27653f4fe --- /dev/null +++ b/llvm/test/CodeGen/VE/Scalar/lea-opt.ll @@ -0,0 +1,63 @@ +; RUN: llc < %s -mtriple=ve | FileCheck %s +; RUN: llc < %s -mtriple=ve -relocation-model=pic \ +; RUN: | FileCheck %s --check-prefix=PIC + +;;; Tests for lea instruction and its optimizations + +%struct.buffer = type { i64, [1 x i8] } + +@data = internal global i8 0, align 1 +@buf = internal global %struct.buffer zeroinitializer, align 8 + +; Function Attrs: norecurse nounwind readnone +define nonnull i8* @lea_basic() { +; CHECK-LABEL: lea_basic: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s0, data@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, data@hi(, %s0) +; CHECK-NEXT: b.l.t (, %s10) +; +; PIC-LABEL: lea_basic: +; PIC: # %bb.0: +; PIC-NEXT: st %s15, 24(, %s11) +; PIC-NEXT: st %s16, 32(, %s11) +; PIC-NEXT: lea %s15, _GLOBAL_OFFSET_TABLE_@pc_lo(-24) +; PIC-NEXT: and %s15, %s15, (32)0 +; PIC-NEXT: sic %s16 +; PIC-NEXT: lea.sl %s15, _GLOBAL_OFFSET_TABLE_@pc_hi(%s16, %s15) +; PIC-NEXT: lea %s0, data@gotoff_lo +; PIC-NEXT: and %s0, %s0, (32)0 +; PIC-NEXT: lea.sl %s0, data@gotoff_hi(%s0, %s15) +; PIC-NEXT: ld %s16, 32(, %s11) +; PIC-NEXT: ld %s15, 24(, %s11) +; PIC-NEXT: b.l.t (, %s10) + ret i8* @data +} + +; Function Attrs: norecurse nounwind readnone +define i8* @lea_offset() { +; CHECK-LABEL: lea_offset: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s0, buf@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, buf@hi(8, %s0) +; CHECK-NEXT: b.l.t (, %s10) +; +; PIC-LABEL: lea_offset: +; PIC: # %bb.0: +; PIC-NEXT: st %s15, 24(, %s11) +; PIC-NEXT: st %s16, 32(, %s11) +; PIC-NEXT: lea %s15, _GLOBAL_OFFSET_TABLE_@pc_lo(-24) +; PIC-NEXT: and %s15, %s15, (32)0 +; PIC-NEXT: sic %s16 +; PIC-NEXT: lea.sl %s15, _GLOBAL_OFFSET_TABLE_@pc_hi(%s16, %s15) +; PIC-NEXT: lea %s0, buf@gotoff_lo +; PIC-NEXT: and %s0, %s0, (32)0 +; PIC-NEXT: lea.sl %s0, buf@gotoff_hi(, %s0) +; PIC-NEXT: lea %s0, 8(%s0, %s15) +; PIC-NEXT: ld %s16, 32(, %s11) +; PIC-NEXT: ld %s15, 24(, %s11) +; PIC-NEXT: b.l.t (, %s10) + ret i8* getelementptr inbounds (%struct.buffer, %struct.buffer* @buf, i64 0, i32 1, i64 0) +} From b2ba6867eac10874bd279c739639bdb9e60c1996 Mon Sep 17 00:00:00 2001 From: Yafei Liu Date: Mon, 21 Dec 2020 08:22:56 -0500 Subject: [PATCH 009/378] Refactoring the attribute plugin example to fit the new API Make the example compile and the test case pass. --- clang/examples/Attribute/Attribute.cpp | 49 ++++++++++++++++-------- clang/test/Frontend/plugin-attribute.cpp | 39 +++++++++---------- 2 files changed, 51 insertions(+), 37 deletions(-) diff --git a/clang/examples/Attribute/Attribute.cpp b/clang/examples/Attribute/Attribute.cpp index 998f175dae546..159b09e4b154d 100644 --- a/clang/examples/Attribute/Attribute.cpp +++ b/clang/examples/Attribute/Attribute.cpp @@ -23,9 +23,10 @@ namespace { struct ExampleAttrInfo : public ParsedAttrInfo { ExampleAttrInfo() { - // Can take an optional string argument (the check that the argument - // actually is a string happens in handleDeclAttribute). - OptArgs = 1; + // Can take up to 15 optional arguments, to emulate accepting a variadic + // number of arguments. This just illustrates how many arguments a + // `ParsedAttrInfo` can hold, we will not use that much in this example. + OptArgs = 15; // GNU-style __attribute__(("example")) and C++-style [[example]] and // [[plugin::example]] supported. static constexpr Spelling S[] = {{ParsedAttr::AS_GNU, "example"}, @@ -39,7 +40,7 @@ struct ExampleAttrInfo : public ParsedAttrInfo { // This attribute appertains to functions only. if (!isa(D)) { S.Diag(Attr.getLoc(), diag::warn_attribute_wrong_decl_type_str) - << Attr << "functions"; + << Attr << "functions"; return false; } return true; @@ -55,23 +56,39 @@ struct ExampleAttrInfo : public ParsedAttrInfo { S.Diag(Attr.getLoc(), ID); return AttributeNotApplied; } - // Check if we have an optional string argument. - StringRef Str = ""; + // We make some rules here: + // 1. Only accept at most 3 arguments here. + // 2. The first argument must be a string literal if it exists. + if (Attr.getNumArgs() > 3) { + unsigned ID = S.getDiagnostics().getCustomDiagID( + DiagnosticsEngine::Error, + "'example' attribute only accepts at most three arguments"); + S.Diag(Attr.getLoc(), ID); + return AttributeNotApplied; + } + // If there are arguments, the first argument should be a string literal. if (Attr.getNumArgs() > 0) { - Expr *ArgExpr = Attr.getArgAsExpr(0); + auto *Arg0 = Attr.getArgAsExpr(0); StringLiteral *Literal = - dyn_cast(ArgExpr->IgnoreParenCasts()); - if (Literal) { - Str = Literal->getString(); - } else { - S.Diag(ArgExpr->getExprLoc(), diag::err_attribute_argument_type) - << Attr.getAttrName() << AANT_ArgumentString; + dyn_cast(Arg0->IgnoreParenCasts()); + if (!Literal) { + unsigned ID = S.getDiagnostics().getCustomDiagID( + DiagnosticsEngine::Error, "first argument to the 'example' " + "attribute must be a string literal"); + S.Diag(Attr.getLoc(), ID); return AttributeNotApplied; } + SmallVector ArgsBuf; + for (unsigned i = 0; i < Attr.getNumArgs(); i++) { + ArgsBuf.push_back(Attr.getArgAsExpr(i)); + } + D->addAttr(AnnotateAttr::Create(S.Context, "example", ArgsBuf.data(), + ArgsBuf.size(), Attr.getRange())); + } else { + // Attach an annotate attribute to the Decl. + D->addAttr(AnnotateAttr::Create(S.Context, "example", nullptr, 0, + Attr.getRange())); } - // Attach an annotate attribute to the Decl. - D->addAttr(AnnotateAttr::Create(S.Context, "example(" + Str.str() + ")", - Attr.getRange())); return AttributeApplied; } }; diff --git a/clang/test/Frontend/plugin-attribute.cpp b/clang/test/Frontend/plugin-attribute.cpp index 571ede3dc0b11..969105927be52 100644 --- a/clang/test/Frontend/plugin-attribute.cpp +++ b/clang/test/Frontend/plugin-attribute.cpp @@ -1,25 +1,22 @@ -// RUN: %clang -fplugin=%llvmshlibdir/Attribute%pluginext -emit-llvm -S %s -o - 2>&1 | FileCheck %s --check-prefix=ATTRIBUTE -// RUN: not %clang -fplugin=%llvmshlibdir/Attribute%pluginext -emit-llvm -DBAD_ATTRIBUTE -S %s -o - 2>&1 | FileCheck %s --check-prefix=BADATTRIBUTE +// RUN: split-file %s %t +// RUN: %clang -cc1 -load %llvmshlibdir/Attribute%pluginext -fsyntax-only -ast-dump -verify %t/good_attr.cpp | FileCheck %s +// RUN: %clang -fplugin=%llvmshlibdir/Attribute%pluginext -fsyntax-only -Xclang -verify %t/bad_attr.cpp // REQUIRES: plugins, examples +//--- good_attr.cpp +// expected-no-diagnostics +void fn1a() __attribute__((example)) {} +[[example]] void fn1b() {} +[[plugin::example]] void fn1c() {} +void fn2() __attribute__((example("somestring", 1, 2.0))) {} +// CHECK-COUNT-4: -AnnotateAttr 0x{{[0-9a-z]+}} {{}} "example" +// CHECK: -StringLiteral 0x{{[0-9a-z]+}} {{}} 'const char [{{[0-9]+}}]' lvalue "somestring" +// CHECK: -IntegerLiteral 0x{{[0-9a-z]+}} {{}} 'int' 1 +// CHECK: -FloatingLiteral 0x{{[0-9a-z]+}} {{}} 'double' 2.000000e+00 -void fn1a() __attribute__((example)) { } -[[example]] void fn1b() { } -[[plugin::example]] void fn1c() { } -void fn2() __attribute__((example("somestring"))) { } -// ATTRIBUTE: warning: 'example' attribute only applies to functions -int var1 __attribute__((example("otherstring"))) = 1; - -// ATTRIBUTE: [[STR1_VAR:@.+]] = private unnamed_addr constant [10 x i8] c"example()\00" -// ATTRIBUTE: [[STR2_VAR:@.+]] = private unnamed_addr constant [20 x i8] c"example(somestring)\00" -// ATTRIBUTE: @llvm.global.annotations = {{.*}}@{{.*}}fn1a{{.*}}[[STR1_VAR]]{{.*}}@{{.*}}fn1b{{.*}}[[STR1_VAR]]{{.*}}@{{.*}}fn1c{{.*}}[[STR1_VAR]]{{.*}}@{{.*}}fn2{{.*}}[[STR2_VAR]] - -#ifdef BAD_ATTRIBUTE +//--- bad_attr.cpp +int var1 __attribute__((example("otherstring"))) = 1; // expected-warning {{'example' attribute only applies to functions}} class Example { - // BADATTRIBUTE: error: 'example' attribute only allowed at file scope - void __attribute__((example)) fn3(); + void __attribute__((example)) fn3(); // expected-error {{'example' attribute only allowed at file scope}} }; -// BADATTRIBUTE: error: 'example' attribute requires a string -void fn4() __attribute__((example(123))) { } -// BADATTRIBUTE: error: 'example' attribute takes no more than 1 argument -void fn5() __attribute__((example("a","b"))) { } -#endif +void fn4() __attribute__((example(123))) { } // expected-error {{'example's first argument should be a string literal}} +void fn5() __attribute__((example("a","b", 3, 4.0))) { } // expected-error {{'example' attribute only allowed at most three arguments}} From 6f45049fb6e5c6d573ef5bae338da822f6cbaa53 Mon Sep 17 00:00:00 2001 From: Denis Antrushin Date: Thu, 17 Dec 2020 16:36:57 +0700 Subject: [PATCH 010/378] [Statepoints] Disable VReg lowering for values used on exception path of invoke. Currently we lower invokes the same way as usual calls, e.g.: V1 = STATEPOINT ... V (tied-def 0) But this is incorrect is V1 is used on exceptional path. By LLVM rules V1 neither dominates its uses in landing pad, nor its live range is live on entry to landing pad. So compiler is allowed to do various weird transformations like splitting live range after statepoint and use split LR in catch block. Until (and if) we find better solution to this problem, let's use old lowering (spilling) for those values which are used on exceptional path and allow VReg lowering for values used only on normal path. Differential Revision: https://reviews.llvm.org/D93449 --- .../SelectionDAG/StatepointLowering.cpp | 24 +++++++++- .../CodeGen/X86/statepoint-vreg-invoke.ll | 45 ++++++++++--------- 2 files changed, 47 insertions(+), 22 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp index 2d2eb252e4e28..65ad5b0b5d8f2 100644 --- a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -546,6 +546,18 @@ lowerStatepointMetaArgs(SmallVectorImpl &Ops, // Decide which deriver pointers will go on VRegs unsigned MaxVRegPtrs = MaxRegistersForGCPointers.getValue(); + // Pointers used on exceptional path of invoke statepoint. + // We cannot assing them to VRegs. + SmallSet LPadPointers; + if (auto *StInvoke = dyn_cast_or_null(SI.StatepointInstr)) { + LandingPadInst *LPI = StInvoke->getLandingPadInst(); + for (auto *Relocate : SI.GCRelocates) + if (Relocate->getOperand(0) == LPI) { + LPadPointers.insert(Builder.getValue(Relocate->getBasePtr())); + LPadPointers.insert(Builder.getValue(Relocate->getDerivedPtr())); + } + } + LLVM_DEBUG(dbgs() << "Deciding how to lower GC Pointers:\n"); // List of unique lowered GC Pointer values. @@ -555,6 +567,14 @@ lowerStatepointMetaArgs(SmallVectorImpl &Ops, unsigned CurNumVRegs = 0; + auto canPassGCPtrOnVReg = [&](SDValue SD) { + if (SD.getValueType().isVector()) + return false; + if (LPadPointers.count(SD)) + return false; + return !willLowerDirectly(SD); + }; + auto processGCPtr = [&](const Value *V) { SDValue PtrSD = Builder.getValue(V); if (!LoweredGCPtrs.insert(PtrSD)) @@ -564,7 +584,9 @@ lowerStatepointMetaArgs(SmallVectorImpl &Ops, assert(!LowerAsVReg.count(PtrSD) && "must not have been seen"); if (LowerAsVReg.size() == MaxVRegPtrs) return; - if (willLowerDirectly(PtrSD) || V->getType()->isVectorTy()) { + assert(V->getType()->isVectorTy() == PtrSD.getValueType().isVector() && + "IR and SD types disagree"); + if (!canPassGCPtrOnVReg(PtrSD)) { LLVM_DEBUG(dbgs() << "direct/spill "; PtrSD.dump(&Builder.DAG)); return; } diff --git a/llvm/test/CodeGen/X86/statepoint-vreg-invoke.ll b/llvm/test/CodeGen/X86/statepoint-vreg-invoke.ll index b734dca622ae0..7c5a734acd6af 100644 --- a/llvm/test/CodeGen/X86/statepoint-vreg-invoke.ll +++ b/llvm/test/CodeGen/X86/statepoint-vreg-invoke.ll @@ -10,14 +10,16 @@ declare dso_local i32* @personality_function() define i64 addrspace(1)* @test_basic_invoke(i64 addrspace(1)* %obj, i64 addrspace(1)* %obj1) ; CHECK-LABEL: name: test_basic_invoke ; CHECK: bb.0.entry: -; CHECK: renamable $r14, renamable $rbx = STATEPOINT 0, 0, 1, @some_call, $rdi, 2, 0, 2, 0, 2, 5, 2, 0, 2, -1, 2, 0, 2, 0, 2, 0, 2, 2, killed renamable $r14(tied-def 0), killed renamable $rbx(tied-def 1), 2, 0, 2, 2, 0, 0, 1, 1, csr_64, implicit-def $rsp, implicit-def $ssp +; CHECK: MOV64mr %stack.1, 1, $noreg, 0, $noreg, renamable $rdi :: (store 8 into %stack.1) +; CHECK: MOV64mr %stack.0, 1, $noreg, 0, $noreg, killed renamable $rsi :: (store 8 into %stack.0) +; CHECK: STATEPOINT 0, 0, 1, @some_call, $rdi, 2, 0, 2, 0, 2, 5, 2, 0, 2, -1, 2, 0, 2, 0, 2, 0, 2, 2, 1, 8, %stack.0, 0, 1, 8, %stack.1, 0, 2, 0, 2, 2, 0, 0, 1, 1, csr_64, implicit-def $rsp, implicit-def $ssp :: (volatile load store 8 on %stack.0), (volatile load store 8 on %stack.1) ; CHECK: JMP_1 %bb.1 ; CHECK: bb.1.safepoint_normal_dest: +; CHECK: renamable $rax = MOV64rm %stack.1, 1, $noreg, 0, $noreg :: (load 8 from %stack.1) ; CHECK: bb.2.normal_return: -; CHECK: $rax = COPY killed renamable $rbx ; CHECK: RET 0, $rax ; CHECK: bb.3.exceptional_return (landing-pad): -; CHECK: $rax = COPY killed renamable $r14 +; CHECK: renamable $rax = MOV64rm %stack.0, 1, $noreg, 0, $noreg :: (load 8 from %stack.0) ; CHECK: RET 0, $rax gc "statepoint-example" personality i32* ()* @"personality_function" { entry: @@ -43,35 +45,37 @@ exceptional_return: define i64 addrspace(1)* @test_invoke_same_val(i1 %cond, i64 addrspace(1)* %val1, i64 addrspace(1)* %val2, i64 addrspace(1)* %val3) ; CHECK-LABEL: name: test_invoke_same_val ; CHECK: bb.0.entry: -; CHECK: renamable $r15 = COPY $rcx -; CHECK: renamable $rbx = COPY $rdx -; CHECK: renamable $rbp = COPY $rsi +; CHECK: renamable $rbx = COPY $rcx +; CHECK: renamable $rbp = COPY $rdx ; CHECK: renamable $r14d = COPY $edi ; CHECK: TEST8ri renamable $r14b, 1, implicit-def $eflags ; CHECK: JCC_1 %bb.3, 4, implicit killed $eflags ; CHECK: JMP_1 %bb.1 ; CHECK: bb.1.left: -; CHECK: $rdi = COPY renamable $rbp -; CHECK: renamable $rbx, renamable $rbp = STATEPOINT 0, 0, 1, @some_call, $rdi, 2, 0, 2, 0, 2, 0, 2, 2, killed renamable $rbx(tied-def 0), killed renamable $rbp(tied-def 1), 2, 0, 2, 2, 0, 0, 1, 1, csr_64, implicit-def $rsp, implicit-def $ssp +; CHECK: MOV64mr %stack.0, 1, $noreg, 0, $noreg, renamable $rsi :: (store 8 into %stack.0) +; CHECK: $rdi = COPY killed renamable $rsi +; CHECK: renamable $rbp = STATEPOINT 0, 0, 1, @some_call, $rdi, 2, 0, 2, 0, 2, 0, 2, 2, killed renamable $rbp(tied-def 0), 1, 8, %stack.0, 0, 2, 0, 2, 2, 0, 0, 1, 1, csr_64, implicit-def $rsp, implicit-def $ssp :: (volatile load store 8 on %stack.0) ; CHECK: JMP_1 %bb.2 ; CHECK: bb.2.left.relocs: +; CHECK: renamable $rbx = MOV64rm %stack.0, 1, $noreg, 0, $noreg :: (load 8 from %stack.0) ; CHECK: JMP_1 %bb.5 ; CHECK: bb.3.right: -; CHECK: $rdi = COPY killed renamable $rbp -; CHECK: renamable $r15, renamable $rbx = STATEPOINT 0, 0, 1, @some_call, $rdi, 2, 0, 2, 0, 2, 0, 2, 2, killed renamable $r15(tied-def 0), killed renamable $rbx(tied-def 1), 2, 0, 2, 2, 0, 0, 1, 1, csr_64, implicit-def $rsp, implicit-def $ssp +; CHECK: MOV64mr %stack.0, 1, $noreg, 0, $noreg, killed renamable $rbp :: (store 8 into %stack.0) +; CHECK: $rdi = COPY killed renamable $rsi +; CHECK: renamable $rbx = STATEPOINT 0, 0, 1, @some_call, $rdi, 2, 0, 2, 0, 2, 0, 2, 2, killed renamable $rbx(tied-def 0), 1, 8, %stack.0, 0, 2, 0, 2, 2, 0, 0, 1, 1, csr_64, implicit-def $rsp, implicit-def $ssp :: (volatile load store 8 on %stack.0) ; CHECK: JMP_1 %bb.4 ; CHECK: bb.4.right.relocs: -; CHECK: renamable $rbp = COPY killed renamable $r15 +; CHECK: renamable $rbp = MOV64rm %stack.0, 1, $noreg, 0, $noreg :: (load 8 from %stack.0) ; CHECK: bb.5.normal_return: ; CHECK: TEST8ri renamable $r14b, 1, implicit-def $eflags, implicit killed $r14d -; CHECK: renamable $rbp = CMOV64rr killed renamable $rbp, killed renamable $rbx, 4, implicit killed $eflags -; CHECK: $rax = COPY killed renamable $rbp +; CHECK: renamable $rbx = CMOV64rr killed renamable $rbx, killed renamable $rbp, 4, implicit killed $eflags +; CHECK: $rax = COPY killed renamable $rbx ; CHECK: RET 0, $rax ; CHECK: bb.6.exceptional_return.left (landing-pad): -; CHECK: $rax = COPY killed renamable $rbp +; CHECK: renamable $rax = MOV64rm %stack.0, 1, $noreg, 0, $noreg :: (load 8 from %stack.0) ; CHECK: RET 0, $rax ; CHECK: bb.7.exceptional_return.right (landing-pad): -; CHECK: $rax = COPY killed renamable $rbx +; CHECK: renamable $rax = MOV64rm %stack.0, 1, $noreg, 0, $noreg :: (load 8 from %stack.0) ; CHECK: RET 0, $rax gc "statepoint-example" personality i32* ()* @"personality_function" { entry: @@ -117,17 +121,16 @@ exceptional_return.right: define void @test_duplicate_ir_values() gc "statepoint-example" personality i32* ()* @personality_function { ; CHECK-LABEL: name: test_duplicate_ir_values ; CHECK: bb.0.entry: -; CHECK: renamable $rbx = MOV64rm undef renamable $rax, 1, $noreg, 0, $noreg :: (load 8 from `i8 addrspace(1)* addrspace(1)* undef`, addrspace 1) -; CHECK: renamable $rbx = STATEPOINT 1, 16, 5, undef renamable $rax, undef $edi, undef $rsi, undef $edx, undef $ecx, undef $r8d, 2, 0, 2, 0, 2, 0, 2, 1, killed renamable $rbx(tied-def 0), 2, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp, implicit-def dead $eax +; CHECK: renamable $rax = MOV64rm undef renamable $rax, 1, $noreg, 0, $noreg :: (load 8 from `i8 addrspace(1)* addrspace(1)* undef`, addrspace 1) +; CHECK: MOV64mr %stack.0, 1, $noreg, 0, $noreg, killed renamable $rax :: (store 8 into %stack.0) +; CHECK: STATEPOINT 1, 16, 5, undef renamable $rax, undef $edi, undef $rsi, undef $edx, undef $ecx, undef $r8d, 2, 0, 2, 0, 2, 0, 2, 1, 1, 8, %stack.0, 0, 2, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp, implicit-def dead $eax :: (volatile load store 8 on %stack.0) ; CHECK: JMP_1 %bb.1 ; CHECK: bb.1.normal_continue: -; CHECK: MOV64mr %stack.0, 1, $noreg, 0, $noreg, killed renamable $rbx :: (store 8 into %stack.0) ; CHECK: $edi = MOV32ri 10 -; CHECK: STATEPOINT 2882400000, 0, 1, target-flags(x86-plt) @__llvm_deoptimize, killed $edi, 2, 0, 2, 2, 2, 2, 1, 8, %stack.0, 0, 1, 8, %stack.0, 0, 2, 0, 2, 0, 2, 0, csr_64, implicit-def $rsp, implicit-def $ssp :: (volatile load store 8 on %stack.0) +; CHECK: STATEPOINT 2882400000, 0, 1, target-flags(x86-plt) @__llvm_deoptimize, killed $edi, 2, 0, 2, 2, 2, 2, 1, 8, %stack.0, 0, 1, 8, %stack.0, 0, 2, 0, 2, 0, 2, 0, csr_64, implicit-def $rsp, implicit-def $ssp ; CHECK: bb.2.exceptional_return (landing-pad): -; CHECK: MOV64mr %stack.0, 1, $noreg, 0, $noreg, killed renamable $rbx :: (store 8 into %stack.0) ; CHECK: $edi = MOV32ri -271 -; CHECK: STATEPOINT 2882400000, 0, 1, target-flags(x86-plt) @__llvm_deoptimize, killed $edi, 2, 0, 2, 0, 2, 1, 1, 8, %stack.0, 0, 2, 0, 2, 0, 2, 0, csr_64, implicit-def $rsp, implicit-def $ssp :: (volatile load store 8 on %stack.0) +; CHECK: STATEPOINT 2882400000, 0, 1, target-flags(x86-plt) @__llvm_deoptimize, killed $edi, 2, 0, 2, 0, 2, 1, 1, 8, %stack.0, 0, 2, 0, 2, 0, 2, 0, csr_64, implicit-def $rsp, implicit-def $ssp entry: %val1 = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* undef, align 8 %val2 = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* undef, align 8 From f25089237376dd43c8c37a18ea9d132f0845eda4 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Mon, 21 Dec 2020 11:12:40 +0000 Subject: [PATCH 011/378] [VPlan] Make VPRecipeBase inherit from VPDef. This patch makes VPRecipeBase a direct subclass of VPDef, moving the SubclassID to VPDef. Reviewed By: gilr Differential Revision: https://reviews.llvm.org/D90564 --- llvm/lib/Transforms/Vectorize/VPlan.cpp | 40 +--- llvm/lib/Transforms/Vectorize/VPlan.h | 197 ++++++++---------- llvm/lib/Transforms/Vectorize/VPlanValue.h | 35 +++- .../Transforms/Vectorize/VPlanTest.cpp | 53 +++-- 4 files changed, 169 insertions(+), 156 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index e58b49a64737e..601c406290b28 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -120,50 +120,18 @@ VPUser *VPRecipeBase::toVPUser() { } VPValue *VPRecipeBase::toVPValue() { + if (getNumDefinedValues() == 1) + return getVPValue(); if (auto *V = dyn_cast(this)) return V; - if (auto *V = dyn_cast(this)) - return V; - if (auto *V = dyn_cast(this)) { - if (!V->isStore()) - return V->getVPValue(); - else - return nullptr; - } - if (auto *V = dyn_cast(this)) - return V; - if (auto *V = dyn_cast(this)) - return V; - if (auto *V = dyn_cast(this)) - return V; - if (auto *V = dyn_cast(this)) - return V; - if (auto *V = dyn_cast(this)) - return V; return nullptr; } const VPValue *VPRecipeBase::toVPValue() const { + if (getNumDefinedValues() == 1) + return getVPValue(); if (auto *V = dyn_cast(this)) return V; - if (auto *V = dyn_cast(this)) - return V; - if (auto *V = dyn_cast(this)) { - if (!V->isStore()) - return V->getVPValue(); - else - return nullptr; - } - if (auto *V = dyn_cast(this)) - return V; - if (auto *V = dyn_cast(this)) - return V; - if (auto *V = dyn_cast(this)) - return V; - if (auto *V = dyn_cast(this)) - return V; - if (auto *V = dyn_cast(this)) - return V; return nullptr; } diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 37f1e9e73c397..ecb7004121a26 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -620,47 +620,23 @@ class VPBlockBase { }; /// VPRecipeBase is a base class modeling a sequence of one or more output IR -/// instructions. -class VPRecipeBase : public ilist_node_with_parent { +/// instructions. VPRecipeBase owns the the VPValues it defines through VPDef +/// and is responsible for deleting its defined values. Single-value +/// VPRecipeBases that also inherit from VPValue must make sure to inherit from +/// VPRecipeBase before VPValue. +class VPRecipeBase : public ilist_node_with_parent, + public VPDef { friend VPBasicBlock; friend class VPBlockUtils; - const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast). /// Each VPRecipe belongs to a single VPBasicBlock. VPBasicBlock *Parent = nullptr; public: - /// An enumeration for keeping track of the concrete subclass of VPRecipeBase - /// that is actually instantiated. Values of this enumeration are kept in the - /// SubclassID field of the VPRecipeBase objects. They are used for concrete - /// type identification. - using VPRecipeTy = enum { - VPBlendSC, - VPBranchOnMaskSC, - VPInstructionSC, - VPInterleaveSC, - VPPredInstPHISC, - VPReductionSC, - VPReplicateSC, - VPWidenCallSC, - VPWidenCanonicalIVSC, - VPWidenGEPSC, - VPWidenIntOrFpInductionSC, - VPWidenMemoryInstructionSC, - VPWidenPHISC, - VPWidenSC, - VPWidenSelectSC - }; - - VPRecipeBase(const unsigned char SC) : SubclassID(SC) {} + VPRecipeBase(const unsigned char SC) : VPDef(SC) {} virtual ~VPRecipeBase() = default; - /// \return an ID for the concrete type of this object. - /// This is used to implement the classof checks. This should not be used - /// for any other purpose, as the values may change as LLVM evolves. - unsigned getVPRecipeID() const { return SubclassID; } - /// \return the VPBasicBlock which this VPRecipe belongs to. VPBasicBlock *getParent() { return Parent; } const VPBasicBlock *getParent() const { return Parent; } @@ -718,27 +694,33 @@ class VPRecipeBase : public ilist_node_with_parent { return cast_or_null(VPV->getUnderlyingValue()); return nullptr; } + + /// Method to support type inquiry through isa, cast, and dyn_cast. + static inline bool classof(const VPDef *D) { + // All VPDefs are also VPRecipeBases. + return true; + } }; -inline bool VPUser::classof(const VPRecipeBase *Recipe) { - return Recipe->getVPRecipeID() == VPRecipeBase::VPInstructionSC || - Recipe->getVPRecipeID() == VPRecipeBase::VPWidenSC || - Recipe->getVPRecipeID() == VPRecipeBase::VPWidenCallSC || - Recipe->getVPRecipeID() == VPRecipeBase::VPWidenSelectSC || - Recipe->getVPRecipeID() == VPRecipeBase::VPWidenGEPSC || - Recipe->getVPRecipeID() == VPRecipeBase::VPBlendSC || - Recipe->getVPRecipeID() == VPRecipeBase::VPInterleaveSC || - Recipe->getVPRecipeID() == VPRecipeBase::VPReplicateSC || - Recipe->getVPRecipeID() == VPRecipeBase::VPReductionSC || - Recipe->getVPRecipeID() == VPRecipeBase::VPBranchOnMaskSC || - Recipe->getVPRecipeID() == VPRecipeBase::VPWidenMemoryInstructionSC; +inline bool VPUser::classof(const VPDef *Def) { + return Def->getVPDefID() == VPRecipeBase::VPInstructionSC || + Def->getVPDefID() == VPRecipeBase::VPWidenSC || + Def->getVPDefID() == VPRecipeBase::VPWidenCallSC || + Def->getVPDefID() == VPRecipeBase::VPWidenSelectSC || + Def->getVPDefID() == VPRecipeBase::VPWidenGEPSC || + Def->getVPDefID() == VPRecipeBase::VPBlendSC || + Def->getVPDefID() == VPRecipeBase::VPInterleaveSC || + Def->getVPDefID() == VPRecipeBase::VPReplicateSC || + Def->getVPDefID() == VPRecipeBase::VPReductionSC || + Def->getVPDefID() == VPRecipeBase::VPBranchOnMaskSC || + Def->getVPDefID() == VPRecipeBase::VPWidenMemoryInstructionSC; } /// This is a concrete Recipe that models a single VPlan-level instruction. /// While as any Recipe it may generate a sequence of IR instructions when /// executed, these instructions would always form a single-def expression as /// the VPInstruction is also a single def-use vertex. -class VPInstruction : public VPUser, public VPValue, public VPRecipeBase { +class VPInstruction : public VPValue, public VPUser, public VPRecipeBase { friend class VPlanSlp; public: @@ -764,9 +746,16 @@ class VPInstruction : public VPUser, public VPValue, public VPRecipeBase { public: VPInstruction(unsigned Opcode, ArrayRef Operands) - : VPUser(Operands), VPValue(VPValue::VPVInstructionSC), + : VPValue(VPValue::VPVInstructionSC), VPUser(Operands), VPRecipeBase(VPRecipeBase::VPInstructionSC), Opcode(Opcode) {} + VPInstruction(unsigned Opcode, ArrayRef Operands) + : VPValue(VPValue::VPVInstructionSC), VPUser({}), + VPRecipeBase(VPRecipeBase::VPInstructionSC), Opcode(Opcode) { + for (auto *I : Operands) + addOperand(I->getVPValue()); + } + VPInstruction(unsigned Opcode, std::initializer_list Operands) : VPInstruction(Opcode, ArrayRef(Operands)) {} @@ -781,8 +770,8 @@ class VPInstruction : public VPUser, public VPValue, public VPRecipeBase { } /// Method to support type inquiry through isa, cast, and dyn_cast. - static inline bool classof(const VPRecipeBase *R) { - return R->getVPRecipeID() == VPRecipeBase::VPInstructionSC; + static inline bool classof(const VPDef *R) { + return R->getVPDefID() == VPRecipeBase::VPInstructionSC; } unsigned getOpcode() const { return Opcode; } @@ -836,14 +825,14 @@ class VPWidenRecipe : public VPRecipeBase, public VPValue, public VPUser { public: template VPWidenRecipe(Instruction &I, iterator_range Operands) - : VPRecipeBase(VPRecipeBase::VPWidenSC), VPValue(VPValue::VPVWidenSC, &I), - VPUser(Operands) {} + : VPRecipeBase(VPRecipeBase::VPWidenSC), + VPValue(VPValue::VPVWidenSC, &I, this), VPUser(Operands) {} ~VPWidenRecipe() override = default; /// Method to support type inquiry through isa, cast, and dyn_cast. - static inline bool classof(const VPRecipeBase *V) { - return V->getVPRecipeID() == VPRecipeBase::VPWidenSC; + static inline bool classof(const VPDef *D) { + return D->getVPDefID() == VPRecipeBase::VPWidenSC; } static inline bool classof(const VPValue *V) { return V->getVPValueID() == VPValue::VPVWidenSC; @@ -858,10 +847,7 @@ class VPWidenRecipe : public VPRecipeBase, public VPValue, public VPUser { }; /// A recipe for widening Call instructions. -class VPWidenCallRecipe : public VPRecipeBase, - public VPDef, - public VPUser, - public VPValue { +class VPWidenCallRecipe : public VPRecipeBase, public VPUser, public VPValue { public: template @@ -872,8 +858,8 @@ class VPWidenCallRecipe : public VPRecipeBase, ~VPWidenCallRecipe() override = default; /// Method to support type inquiry through isa, cast, and dyn_cast. - static inline bool classof(const VPRecipeBase *V) { - return V->getVPRecipeID() == VPRecipeBase::VPWidenCallSC; + static inline bool classof(const VPDef *D) { + return D->getVPDefID() == VPRecipeBase::VPWidenCallSC; } /// Produce a widened version of the call instruction. @@ -885,10 +871,7 @@ class VPWidenCallRecipe : public VPRecipeBase, }; /// A recipe for widening select instructions. -class VPWidenSelectRecipe : public VPRecipeBase, - public VPDef, - public VPUser, - public VPValue { +class VPWidenSelectRecipe : public VPRecipeBase, public VPUser, public VPValue { /// Is the condition of the select loop invariant? bool InvariantCond; @@ -904,8 +887,8 @@ class VPWidenSelectRecipe : public VPRecipeBase, ~VPWidenSelectRecipe() override = default; /// Method to support type inquiry through isa, cast, and dyn_cast. - static inline bool classof(const VPRecipeBase *V) { - return V->getVPRecipeID() == VPRecipeBase::VPWidenSelectSC; + static inline bool classof(const VPDef *D) { + return D->getVPDefID() == VPRecipeBase::VPWidenSelectSC; } /// Produce a widened version of the select instruction. @@ -918,7 +901,6 @@ class VPWidenSelectRecipe : public VPRecipeBase, /// A recipe for handling GEP instructions. class VPWidenGEPRecipe : public VPRecipeBase, - public VPDef, public VPUser, public VPValue { bool IsPtrLoopInvariant; @@ -945,8 +927,8 @@ class VPWidenGEPRecipe : public VPRecipeBase, ~VPWidenGEPRecipe() override = default; /// Method to support type inquiry through isa, cast, and dyn_cast. - static inline bool classof(const VPRecipeBase *V) { - return V->getVPRecipeID() == VPRecipeBase::VPWidenGEPSC; + static inline bool classof(const VPDef *D) { + return D->getVPDefID() == VPRecipeBase::VPWidenGEPSC; } /// Generate the gep nodes. @@ -965,12 +947,17 @@ class VPWidenIntOrFpInductionRecipe : public VPRecipeBase { public: VPWidenIntOrFpInductionRecipe(PHINode *IV, TruncInst *Trunc = nullptr) - : VPRecipeBase(VPWidenIntOrFpInductionSC), IV(IV), Trunc(Trunc) {} + : VPRecipeBase(VPWidenIntOrFpInductionSC), IV(IV), Trunc(Trunc) { + if (Trunc) + new VPValue(Trunc, this); + else + new VPValue(IV, this); + } ~VPWidenIntOrFpInductionRecipe() override = default; /// Method to support type inquiry through isa, cast, and dyn_cast. - static inline bool classof(const VPRecipeBase *V) { - return V->getVPRecipeID() == VPRecipeBase::VPWidenIntOrFpInductionSC; + static inline bool classof(const VPDef *D) { + return D->getVPDefID() == VPRecipeBase::VPWidenIntOrFpInductionSC; } /// Generate the vectorized and scalarized versions of the phi node as @@ -987,12 +974,14 @@ class VPWidenPHIRecipe : public VPRecipeBase { PHINode *Phi; public: - VPWidenPHIRecipe(PHINode *Phi) : VPRecipeBase(VPWidenPHISC), Phi(Phi) {} + VPWidenPHIRecipe(PHINode *Phi) : VPRecipeBase(VPWidenPHISC), Phi(Phi) { + new VPValue(Phi, this); + } ~VPWidenPHIRecipe() override = default; /// Method to support type inquiry through isa, cast, and dyn_cast. - static inline bool classof(const VPRecipeBase *V) { - return V->getVPRecipeID() == VPRecipeBase::VPWidenPHISC; + static inline bool classof(const VPDef *D) { + return D->getVPDefID() == VPRecipeBase::VPWidenPHISC; } /// Generate the phi/select nodes. @@ -1014,6 +1003,7 @@ class VPBlendRecipe : public VPRecipeBase, public VPUser { /// might be incoming with a full mask for which there is no VPValue. VPBlendRecipe(PHINode *Phi, ArrayRef Operands) : VPRecipeBase(VPBlendSC), VPUser(Operands), Phi(Phi) { + new VPValue(Phi, this); assert(Operands.size() > 0 && ((Operands.size() == 1) || (Operands.size() % 2 == 0)) && "Expected either a single incoming value or a positive even number " @@ -1021,8 +1011,8 @@ class VPBlendRecipe : public VPRecipeBase, public VPUser { } /// Method to support type inquiry through isa, cast, and dyn_cast. - static inline bool classof(const VPRecipeBase *V) { - return V->getVPRecipeID() == VPRecipeBase::VPBlendSC; + static inline bool classof(const VPDef *D) { + return D->getVPDefID() == VPRecipeBase::VPBlendSC; } /// Return the number of incoming values, taking into account that a single @@ -1047,7 +1037,7 @@ class VPBlendRecipe : public VPRecipeBase, public VPUser { /// or stores into one wide load/store and shuffles. The first operand of a /// VPInterleave recipe is the address, followed by the stored values, followed /// by an optional mask. -class VPInterleaveRecipe : public VPRecipeBase, public VPDef, public VPUser { +class VPInterleaveRecipe : public VPRecipeBase, public VPUser { const InterleaveGroup *IG; bool HasMask = false; @@ -1073,8 +1063,8 @@ class VPInterleaveRecipe : public VPRecipeBase, public VPDef, public VPUser { ~VPInterleaveRecipe() override = default; /// Method to support type inquiry through isa, cast, and dyn_cast. - static inline bool classof(const VPRecipeBase *V) { - return V->getVPRecipeID() == VPRecipeBase::VPInterleaveSC; + static inline bool classof(const VPDef *D) { + return D->getVPDefID() == VPRecipeBase::VPInterleaveSC; } /// Return the address accessed by this recipe. @@ -1111,7 +1101,7 @@ class VPInterleaveRecipe : public VPRecipeBase, public VPDef, public VPUser { /// A recipe to represent inloop reduction operations, performing a reduction on /// a vector operand into a scalar value, and adding the result to a chain. /// The Operands are {ChainOp, VecOp, [Condition]}. -class VPReductionRecipe : public VPRecipeBase, public VPValue, public VPUser { +class VPReductionRecipe : public VPRecipeBase, public VPUser, public VPValue { /// The recurrence decriptor for the reduction in question. RecurrenceDescriptor *RdxDesc; /// Fast math flags to use for the resulting reduction operation. @@ -1123,9 +1113,9 @@ class VPReductionRecipe : public VPRecipeBase, public VPValue, public VPUser { VPReductionRecipe(RecurrenceDescriptor *R, Instruction *I, VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp, bool NoNaN, const TargetTransformInfo *TTI) - : VPRecipeBase(VPRecipeBase::VPReductionSC), - VPValue(VPValue::VPVReductionSC, I), VPUser({ChainOp, VecOp}), - RdxDesc(R), NoNaN(NoNaN), TTI(TTI) { + : VPRecipeBase(VPRecipeBase::VPReductionSC), VPUser({ChainOp, VecOp}), + VPValue(VPValue::VPVReductionSC, I, this), RdxDesc(R), NoNaN(NoNaN), + TTI(TTI) { if (CondOp) addOperand(CondOp); } @@ -1136,8 +1126,9 @@ class VPReductionRecipe : public VPRecipeBase, public VPValue, public VPUser { static inline bool classof(const VPValue *V) { return V->getVPValueID() == VPValue::VPVReductionSC; } - static inline bool classof(const VPRecipeBase *V) { - return V->getVPRecipeID() == VPRecipeBase::VPReductionSC; + + static inline bool classof(const VPDef *D) { + return D->getVPDefID() == VPRecipeBase::VPReductionSC; } /// Generate the reduction in the loop @@ -1176,7 +1167,7 @@ class VPReplicateRecipe : public VPRecipeBase, public VPUser, public VPValue { VPReplicateRecipe(Instruction *I, iterator_range Operands, bool IsUniform, bool IsPredicated = false) : VPRecipeBase(VPReplicateSC), VPUser(Operands), - VPValue(VPVReplicateSC, I), IsUniform(IsUniform), + VPValue(VPVReplicateSC, I, this), IsUniform(IsUniform), IsPredicated(IsPredicated) { // Retain the previous behavior of predicateInstructions(), where an // insert-element of a predicated instruction got hoisted into the @@ -1189,8 +1180,8 @@ class VPReplicateRecipe : public VPRecipeBase, public VPUser, public VPValue { ~VPReplicateRecipe() override = default; /// Method to support type inquiry through isa, cast, and dyn_cast. - static inline bool classof(const VPRecipeBase *V) { - return V->getVPRecipeID() == VPRecipeBase::VPReplicateSC; + static inline bool classof(const VPDef *D) { + return D->getVPDefID() == VPRecipeBase::VPReplicateSC; } static inline bool classof(const VPValue *V) { @@ -1220,8 +1211,8 @@ class VPBranchOnMaskRecipe : public VPRecipeBase, public VPUser { } /// Method to support type inquiry through isa, cast, and dyn_cast. - static inline bool classof(const VPRecipeBase *V) { - return V->getVPRecipeID() == VPRecipeBase::VPBranchOnMaskSC; + static inline bool classof(const VPDef *D) { + return D->getVPDefID() == VPRecipeBase::VPBranchOnMaskSC; } /// Generate the extraction of the appropriate bit from the block mask and the @@ -1259,12 +1250,14 @@ class VPPredInstPHIRecipe : public VPRecipeBase, public VPUser { /// Construct a VPPredInstPHIRecipe given \p PredInst whose value needs a phi /// nodes after merging back from a Branch-on-Mask. VPPredInstPHIRecipe(VPValue *PredV) - : VPRecipeBase(VPPredInstPHISC), VPUser(PredV) {} + : VPRecipeBase(VPPredInstPHISC), VPUser(PredV) { + new VPValue(VPValue::VPValueSC, PredV->getUnderlyingValue(), this); + } ~VPPredInstPHIRecipe() override = default; /// Method to support type inquiry through isa, cast, and dyn_cast. - static inline bool classof(const VPRecipeBase *V) { - return V->getVPRecipeID() == VPRecipeBase::VPPredInstPHISC; + static inline bool classof(const VPDef *D) { + return D->getVPDefID() == VPRecipeBase::VPPredInstPHISC; } /// Generates phi nodes for live-outs as needed to retain SSA form. @@ -1282,7 +1275,6 @@ class VPPredInstPHIRecipe : public VPRecipeBase, public VPUser { /// TODO: We currently execute only per-part unless a specific instance is /// provided. class VPWidenMemoryInstructionRecipe : public VPRecipeBase, - public VPDef, public VPUser { Instruction &Ingredient; @@ -1312,8 +1304,8 @@ class VPWidenMemoryInstructionRecipe : public VPRecipeBase, } /// Method to support type inquiry through isa, cast, and dyn_cast. - static inline bool classof(const VPRecipeBase *V) { - return V->getVPRecipeID() == VPRecipeBase::VPWidenMemoryInstructionSC; + static inline bool classof(const VPDef *D) { + return D->getVPDefID() == VPRecipeBase::VPWidenMemoryInstructionSC; } /// Return the address accessed by this recipe. @@ -1347,21 +1339,16 @@ class VPWidenMemoryInstructionRecipe : public VPRecipeBase, /// A Recipe for widening the canonical induction variable of the vector loop. class VPWidenCanonicalIVRecipe : public VPRecipeBase { - /// A VPValue representing the canonical vector IV. - VPValue Val; - public: - VPWidenCanonicalIVRecipe() : VPRecipeBase(VPWidenCanonicalIVSC) {} - ~VPWidenCanonicalIVRecipe() override = default; + VPWidenCanonicalIVRecipe() : VPRecipeBase(VPWidenCanonicalIVSC) { + new VPValue(nullptr, this); + } - /// Return the VPValue representing the canonical vector induction variable of - /// the vector loop. - const VPValue *getVPValue() const { return &Val; } - VPValue *getVPValue() { return &Val; } + ~VPWidenCanonicalIVRecipe() override = default; /// Method to support type inquiry through isa, cast, and dyn_cast. - static inline bool classof(const VPRecipeBase *V) { - return V->getVPRecipeID() == VPRecipeBase::VPWidenCanonicalIVSC; + static inline bool classof(const VPDef *D) { + return D->getVPDefID() == VPRecipeBase::VPWidenCanonicalIVSC; } /// Generate a canonical vector induction variable of the vector loop, with diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h index 609a56d3fa23e..a1adccd5c75cb 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanValue.h +++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h @@ -45,6 +45,7 @@ class VPWidenMemoryInstructionRecipe; class VPValue { friend class VPBuilder; friend class VPDef; + friend class VPInstruction; friend struct VPlanTransforms; friend class VPBasicBlock; friend class VPInterleavedAccessInfo; @@ -236,7 +237,7 @@ class VPUser { } /// Method to support type inquiry through isa, cast, and dyn_cast. - static inline bool classof(const VPRecipeBase *Recipe); + static inline bool classof(const VPDef *Recipe); }; /// This class augments a recipe with a set of VPValues defined by the recipe. @@ -247,6 +248,9 @@ class VPUser { class VPDef { friend class VPValue; + /// Subclass identifier (for isa/dyn_cast). + const unsigned char SubclassID; + /// The VPValues defined by this VPDef. TinyPtrVector DefinedValues; @@ -269,6 +273,30 @@ class VPDef { } public: + /// An enumeration for keeping track of the concrete subclass of VPRecipeBase + /// that is actually instantiated. Values of this enumeration are kept in the + /// SubclassID field of the VPRecipeBase objects. They are used for concrete + /// type identification. + using VPRecipeTy = enum { + VPBlendSC, + VPBranchOnMaskSC, + VPInstructionSC, + VPInterleaveSC, + VPPredInstPHISC, + VPReductionSC, + VPReplicateSC, + VPWidenCallSC, + VPWidenCanonicalIVSC, + VPWidenGEPSC, + VPWidenIntOrFpInductionSC, + VPWidenMemoryInstructionSC, + VPWidenPHISC, + VPWidenSC, + VPWidenSelectSC + }; + + VPDef(const unsigned char SC) : SubclassID(SC) {} + virtual ~VPDef() { for (VPValue *D : make_early_inc_range(DefinedValues)) { assert(D->Def == this && @@ -295,6 +323,11 @@ class VPDef { /// Returns the number of values defined by the VPDef. unsigned getNumDefinedValues() const { return DefinedValues.size(); } + + /// \return an ID for the concrete type of this object. + /// This is used to implement the classof checks. This should not be used + /// for any other purpose, as the values may change as LLVM evolves. + unsigned getVPDefID() const { return SubclassID; } }; class VPlan; diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp index 4864d52ad01e8..c0230774bd907 100644 --- a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp @@ -365,7 +365,7 @@ TEST(VPRecipeTest, CastVPInstructionToVPUser) { EXPECT_TRUE(isa(&Recipe)); VPRecipeBase *BaseR = &Recipe; EXPECT_TRUE(isa(BaseR)); - EXPECT_EQ(&Recipe, BaseR->toVPUser()); + EXPECT_EQ(&Recipe, BaseR); } TEST(VPRecipeTest, CastVPWidenRecipeToVPUser) { @@ -383,11 +383,11 @@ TEST(VPRecipeTest, CastVPWidenRecipeToVPUser) { EXPECT_TRUE(isa(&WidenR)); VPRecipeBase *WidenRBase = &WidenR; EXPECT_TRUE(isa(WidenRBase)); - EXPECT_EQ(&WidenR, WidenRBase->toVPUser()); + EXPECT_EQ(&WidenR, WidenRBase); delete AI; } -TEST(VPRecipeTest, CastVPWidenCallRecipeToVPUser) { +TEST(VPRecipeTest, CastVPWidenCallRecipeToVPUserAndVPDef) { LLVMContext C; IntegerType *Int32 = IntegerType::get(C, 32); @@ -402,11 +402,16 @@ TEST(VPRecipeTest, CastVPWidenCallRecipeToVPUser) { EXPECT_TRUE(isa(&Recipe)); VPRecipeBase *BaseR = &Recipe; EXPECT_TRUE(isa(BaseR)); - EXPECT_EQ(&Recipe, BaseR->toVPUser()); + EXPECT_EQ(&Recipe, BaseR); + + VPValue *VPV = &Recipe; + EXPECT_TRUE(isa(VPV->getDef())); + EXPECT_EQ(&Recipe, dyn_cast(VPV->getDef())); + delete Call; } -TEST(VPRecipeTest, CastVPWidenSelectRecipeToVPUser) { +TEST(VPRecipeTest, CastVPWidenSelectRecipeToVPUserAndVPDef) { LLVMContext C; IntegerType *Int1 = IntegerType::get(C, 1); @@ -425,11 +430,16 @@ TEST(VPRecipeTest, CastVPWidenSelectRecipeToVPUser) { EXPECT_TRUE(isa(&WidenSelectR)); VPRecipeBase *BaseR = &WidenSelectR; EXPECT_TRUE(isa(BaseR)); - EXPECT_EQ(&WidenSelectR, BaseR->toVPUser()); + EXPECT_EQ(&WidenSelectR, BaseR); + + VPValue *VPV = &WidenSelectR; + EXPECT_TRUE(isa(VPV->getDef())); + EXPECT_EQ(&WidenSelectR, dyn_cast(VPV->getDef())); + delete SelectI; } -TEST(VPRecipeTest, CastVPWidenGEPRecipeToVPUser) { +TEST(VPRecipeTest, CastVPWidenGEPRecipeToVPUserAndVPDef) { LLVMContext C; IntegerType *Int32 = IntegerType::get(C, 32); @@ -445,7 +455,12 @@ TEST(VPRecipeTest, CastVPWidenGEPRecipeToVPUser) { EXPECT_TRUE(isa(&Recipe)); VPRecipeBase *BaseR = &Recipe; EXPECT_TRUE(isa(BaseR)); - EXPECT_EQ(&Recipe, BaseR->toVPUser()); + EXPECT_EQ(&Recipe, BaseR); + + VPValue *VPV = &Recipe; + EXPECT_TRUE(isa(VPV->getDef())); + EXPECT_EQ(&Recipe, dyn_cast(VPV->getDef())); + delete GEP; } @@ -476,7 +491,7 @@ TEST(VPRecipeTest, CastVPInterleaveRecipeToVPUser) { EXPECT_TRUE(isa(&Recipe)); VPRecipeBase *BaseR = &Recipe; EXPECT_TRUE(isa(BaseR)); - EXPECT_EQ(&Recipe, BaseR->toVPUser()); + EXPECT_EQ(&Recipe, BaseR); } TEST(VPRecipeTest, CastVPReplicateRecipeToVPUser) { @@ -503,10 +518,10 @@ TEST(VPRecipeTest, CastVPBranchOnMaskRecipeToVPUser) { EXPECT_TRUE(isa(&Recipe)); VPRecipeBase *BaseR = &Recipe; EXPECT_TRUE(isa(BaseR)); - EXPECT_EQ(&Recipe, BaseR->toVPUser()); + EXPECT_EQ(&Recipe, BaseR); } -TEST(VPRecipeTest, CastVPWidenMemoryInstructionRecipeToVPUser) { +TEST(VPRecipeTest, CastVPWidenMemoryInstructionRecipeToVPUserAndVPDef) { LLVMContext C; IntegerType *Int32 = IntegerType::get(C, 32); @@ -519,7 +534,12 @@ TEST(VPRecipeTest, CastVPWidenMemoryInstructionRecipeToVPUser) { EXPECT_TRUE(isa(&Recipe)); VPRecipeBase *BaseR = &Recipe; EXPECT_TRUE(isa(BaseR)); - EXPECT_EQ(&Recipe, BaseR->toVPUser()); + EXPECT_EQ(&Recipe, BaseR); + + VPValue *VPV = Recipe.getVPValue(); + EXPECT_TRUE(isa(VPV->getDef())); + EXPECT_EQ(&Recipe, dyn_cast(VPV->getDef())); + delete Load; } @@ -536,11 +556,16 @@ TEST(VPRecipeTest, CastVPReductionRecipeToVPUser) { EXPECT_TRUE(isa(BaseR)); } -struct VPDoubleValueDef : public VPUser, public VPDef { - VPDoubleValueDef(ArrayRef Operands) : VPUser(Operands), VPDef() { +struct VPDoubleValueDef : public VPRecipeBase, public VPUser { + VPDoubleValueDef(ArrayRef Operands) + : VPRecipeBase(99), VPUser(Operands) { new VPValue(nullptr, this); new VPValue(nullptr, this); } + + void execute(struct VPTransformState &State) override{}; + void print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const override {} }; TEST(VPDoubleValueDefTest, traverseUseLists) { From 8c2ad9e85f677546021880dc88e24f633ccacd93 Mon Sep 17 00:00:00 2001 From: "Kazushi (Jam) Marukawa" Date: Sat, 19 Dec 2020 01:12:25 +0900 Subject: [PATCH 012/378] [VE] Correct VMP allocation in calling conv VE used to allocate VM1, VM2, VMP2 (VM4+VM5), and VM3. This patch corrects to allocate VM1, VM2, VMP2 (VM4+VM5), and VM6. Also add a regression test. Reviewed By: simoll Differential Revision: https://reviews.llvm.org/D93570 --- llvm/lib/Target/VE/VECallingConv.td | 4 ++-- llvm/test/CodeGen/VE/Vector/fastcc_callee.ll | 8 ++++++++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/VE/VECallingConv.td b/llvm/lib/Target/VE/VECallingConv.td index 6741d1cc8eaf2..93899c2cae3d1 100644 --- a/llvm/lib/Target/VE/VECallingConv.td +++ b/llvm/lib/Target/VE/VECallingConv.td @@ -116,7 +116,7 @@ def CC_VE_Fast : CallingConv<[ // pair of vector mask --> generic vector mask registers CCIfType<[v512i1], CCAssignToRegWithShadow<[VMP1, VMP2, VMP3], - [VM1, VM1, VM3]>>, + [VM1, VM3, VM5]>>, // Follow the standard C CC for scalars. CCDelegateTo @@ -137,7 +137,7 @@ def RetCC_VE_Fast : CallingConv<[ // pair of vector mask --> generic vector mask registers CCIfType<[v512i1], CCAssignToRegWithShadow<[VMP1, VMP2, VMP3], - [VM1, VM1, VM3]>>, + [VM1, VM3, VM5]>>, // Follow the standard C CC for scalars. CCDelegateTo diff --git a/llvm/test/CodeGen/VE/Vector/fastcc_callee.ll b/llvm/test/CodeGen/VE/Vector/fastcc_callee.ll index c0ad247d0e745..aa7b4944e7e44 100644 --- a/llvm/test/CodeGen/VE/Vector/fastcc_callee.ll +++ b/llvm/test/CodeGen/VE/Vector/fastcc_callee.ll @@ -137,3 +137,11 @@ define fastcc <512 x i1> @vreg_arg_v512i1_vmp3(<512 x i1> %vmp1, <512 x i1> %vmp ; CHECK-NEXT: b.l.t (, %s10) ret <512 x i1> %vmp3 } + +define fastcc <256 x i1> @vmp_cc_bug(<256 x i1> %vm1, <256 x i1> %vm2, <512 x i1> %vmp2, <256 x i1> %vm6) { +; CHECK-LABEL: vmp_cc_bug: +; CHECK: # %bb.0: +; CHECK-NEXT: andm %vm1, %vm0, %vm6 +; CHECK-NEXT: b.l.t (, %s10) + ret <256 x i1> %vm6 +} From d6118759f30e343a05aab053f66e5049ea149175 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Mon, 21 Dec 2020 08:26:55 -0500 Subject: [PATCH 013/378] [InstSimplify] add tests for inverted logic operands; NFC --- llvm/test/Transforms/InstSimplify/AndOrXor.ll | 336 ++++++++++++------ 1 file changed, 227 insertions(+), 109 deletions(-) diff --git a/llvm/test/Transforms/InstSimplify/AndOrXor.ll b/llvm/test/Transforms/InstSimplify/AndOrXor.ll index 8952acc2feb64..9e549ebefc6b0 100644 --- a/llvm/test/Transforms/InstSimplify/AndOrXor.ll +++ b/llvm/test/Transforms/InstSimplify/AndOrXor.ll @@ -885,168 +885,286 @@ define i32 @reversed_not(i32 %a) { define i64 @shl_or_and1(i32 %a, i1 %b) { ; CHECK-LABEL: @shl_or_and1( -; CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[B:%.*]] to i64 -; CHECK-NEXT: ret i64 [[TMP2]] +; CHECK-NEXT: [[T2:%.*]] = zext i1 [[B:%.*]] to i64 +; CHECK-NEXT: ret i64 [[T2]] ; - %tmp1 = zext i32 %a to i64 - %tmp2 = zext i1 %b to i64 - %tmp3 = shl nuw i64 %tmp1, 32 - %tmp4 = or i64 %tmp2, %tmp3 - %tmp5 = and i64 %tmp4, 1 - ret i64 %tmp5 + %t1 = zext i32 %a to i64 + %t2 = zext i1 %b to i64 + %t3 = shl nuw i64 %t1, 32 + %t4 = or i64 %t2, %t3 + %t5 = and i64 %t4, 1 + ret i64 %t5 } define i64 @shl_or_and2(i32 %a, i1 %b) { ; CHECK-LABEL: @shl_or_and2( -; CHECK-NEXT: [[TMP1:%.*]] = zext i1 [[B:%.*]] to i64 -; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP1]], 32 -; CHECK-NEXT: ret i64 [[TMP3]] +; CHECK-NEXT: [[T1:%.*]] = zext i1 [[B:%.*]] to i64 +; CHECK-NEXT: [[T3:%.*]] = shl nuw i64 [[T1]], 32 +; CHECK-NEXT: ret i64 [[T3]] ; - %tmp1 = zext i1 %b to i64 - %tmp2 = zext i32 %a to i64 - %tmp3 = shl nuw i64 %tmp1, 32 - %tmp4 = or i64 %tmp2, %tmp3 - %tmp5 = and i64 %tmp4, 4294967296 - ret i64 %tmp5 + %t1 = zext i1 %b to i64 + %t2 = zext i32 %a to i64 + %t3 = shl nuw i64 %t1, 32 + %t4 = or i64 %t2, %t3 + %t5 = and i64 %t4, 4294967296 + ret i64 %t5 } ; concatenate two 32-bit integers and extract lower 32-bit define i64 @shl_or_and3(i32 %a, i32 %b) { ; CHECK-LABEL: @shl_or_and3( -; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[B:%.*]] to i64 -; CHECK-NEXT: ret i64 [[TMP2]] +; CHECK-NEXT: [[T2:%.*]] = zext i32 [[B:%.*]] to i64 +; CHECK-NEXT: ret i64 [[T2]] ; - %tmp1 = zext i32 %a to i64 - %tmp2 = zext i32 %b to i64 - %tmp3 = shl nuw i64 %tmp1, 32 - %tmp4 = or i64 %tmp2, %tmp3 - %tmp5 = and i64 %tmp4, 4294967295 - ret i64 %tmp5 + %t1 = zext i32 %a to i64 + %t2 = zext i32 %b to i64 + %t3 = shl nuw i64 %t1, 32 + %t4 = or i64 %t2, %t3 + %t5 = and i64 %t4, 4294967295 + ret i64 %t5 } ; concatenate two 16-bit integers and extract higher 16-bit define i32 @shl_or_and4(i16 %a, i16 %b) { ; CHECK-LABEL: @shl_or_and4( -; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32 -; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i32 [[TMP1]], 16 -; CHECK-NEXT: ret i32 [[TMP3]] +; CHECK-NEXT: [[T1:%.*]] = zext i16 [[A:%.*]] to i32 +; CHECK-NEXT: [[T3:%.*]] = shl nuw i32 [[T1]], 16 +; CHECK-NEXT: ret i32 [[T3]] ; - %tmp1 = zext i16 %a to i32 - %tmp2 = zext i16 %b to i32 - %tmp3 = shl nuw i32 %tmp1, 16 - %tmp4 = or i32 %tmp2, %tmp3 - %tmp5 = and i32 %tmp4, 4294901760 ; mask with 0xFFFF0000 - ret i32 %tmp5 + %t1 = zext i16 %a to i32 + %t2 = zext i16 %b to i32 + %t3 = shl nuw i32 %t1, 16 + %t4 = or i32 %t2, %t3 + %t5 = and i32 %t4, 4294901760 ; mask with 0xFFFF0000 + ret i32 %t5 } define i128 @shl_or_and5(i64 %a, i1 %b) { ; CHECK-LABEL: @shl_or_and5( -; CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[B:%.*]] to i128 -; CHECK-NEXT: ret i128 [[TMP2]] +; CHECK-NEXT: [[T2:%.*]] = zext i1 [[B:%.*]] to i128 +; CHECK-NEXT: ret i128 [[T2]] ; - %tmp1 = zext i64 %a to i128 - %tmp2 = zext i1 %b to i128 - %tmp3 = shl nuw i128 %tmp1, 64 - %tmp4 = or i128 %tmp2, %tmp3 - %tmp5 = and i128 %tmp4, 1 - ret i128 %tmp5 + %t1 = zext i64 %a to i128 + %t2 = zext i1 %b to i128 + %t3 = shl nuw i128 %t1, 64 + %t4 = or i128 %t2, %t3 + %t5 = and i128 %t4, 1 + ret i128 %t5 } ; A variation of above test cases; it fails due to the mask value define i32 @shl_or_and6(i16 %a, i16 %b) { ; CHECK-LABEL: @shl_or_and6( -; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32 -; CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32 -; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i32 [[TMP1]], 16 -; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP2]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[TMP4]], -65535 -; CHECK-NEXT: ret i32 [[TMP5]] +; CHECK-NEXT: [[T1:%.*]] = zext i16 [[A:%.*]] to i32 +; CHECK-NEXT: [[T2:%.*]] = zext i16 [[B:%.*]] to i32 +; CHECK-NEXT: [[T3:%.*]] = shl nuw i32 [[T1]], 16 +; CHECK-NEXT: [[T4:%.*]] = or i32 [[T2]], [[T3]] +; CHECK-NEXT: [[T5:%.*]] = and i32 [[T4]], -65535 +; CHECK-NEXT: ret i32 [[T5]] ; - %tmp1 = zext i16 %a to i32 - %tmp2 = zext i16 %b to i32 - %tmp3 = shl nuw i32 %tmp1, 16 - %tmp4 = or i32 %tmp2, %tmp3 - %tmp5 = and i32 %tmp4, 4294901761 ; mask with 0xFFFF0001 - ret i32 %tmp5 + %t1 = zext i16 %a to i32 + %t2 = zext i16 %b to i32 + %t3 = shl nuw i32 %t1, 16 + %t4 = or i32 %t2, %t3 + %t5 = and i32 %t4, 4294901761 ; mask with 0xFFFF0001 + ret i32 %t5 } ; A variation of above test cases; it fails due to the mask value define i32 @shl_or_and7(i16 %a, i16 %b) { ; CHECK-LABEL: @shl_or_and7( -; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32 -; CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32 -; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i32 [[TMP1]], 16 -; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP2]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[TMP4]], -131072 -; CHECK-NEXT: ret i32 [[TMP5]] +; CHECK-NEXT: [[T1:%.*]] = zext i16 [[A:%.*]] to i32 +; CHECK-NEXT: [[T2:%.*]] = zext i16 [[B:%.*]] to i32 +; CHECK-NEXT: [[T3:%.*]] = shl nuw i32 [[T1]], 16 +; CHECK-NEXT: [[T4:%.*]] = or i32 [[T2]], [[T3]] +; CHECK-NEXT: [[T5:%.*]] = and i32 [[T4]], -131072 +; CHECK-NEXT: ret i32 [[T5]] ; - %tmp1 = zext i16 %a to i32 - %tmp2 = zext i16 %b to i32 - %tmp3 = shl nuw i32 %tmp1, 16 - %tmp4 = or i32 %tmp2, %tmp3 - %tmp5 = and i32 %tmp4, 4294836224 ; mask with 0xFFFE0000 - ret i32 %tmp5 + %t1 = zext i16 %a to i32 + %t2 = zext i16 %b to i32 + %t3 = shl nuw i32 %t1, 16 + %t4 = or i32 %t2, %t3 + %t5 = and i32 %t4, 4294836224 ; mask with 0xFFFE0000 + ret i32 %t5 } ; A variation of above test cases; it fails due to the mask value define i32 @shl_or_and8(i16 %a, i16 %b) { ; CHECK-LABEL: @shl_or_and8( -; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32 -; CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32 -; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i32 [[TMP1]], 16 -; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP2]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[TMP4]], 131071 -; CHECK-NEXT: ret i32 [[TMP5]] +; CHECK-NEXT: [[T1:%.*]] = zext i16 [[A:%.*]] to i32 +; CHECK-NEXT: [[T2:%.*]] = zext i16 [[B:%.*]] to i32 +; CHECK-NEXT: [[T3:%.*]] = shl nuw i32 [[T1]], 16 +; CHECK-NEXT: [[T4:%.*]] = or i32 [[T2]], [[T3]] +; CHECK-NEXT: [[T5:%.*]] = and i32 [[T4]], 131071 +; CHECK-NEXT: ret i32 [[T5]] ; - %tmp1 = zext i16 %a to i32 - %tmp2 = zext i16 %b to i32 - %tmp3 = shl nuw i32 %tmp1, 16 - %tmp4 = or i32 %tmp2, %tmp3 - %tmp5 = and i32 %tmp4, 131071 ; mask with 0x1FFFF - ret i32 %tmp5 + %t1 = zext i16 %a to i32 + %t2 = zext i16 %b to i32 + %t3 = shl nuw i32 %t1, 16 + %t4 = or i32 %t2, %t3 + %t5 = and i32 %t4, 131071 ; mask with 0x1FFFF + ret i32 %t5 } define <2 x i64> @shl_or_and1v(<2 x i32> %a, <2 x i1> %b) { ; CHECK-LABEL: @shl_or_and1v( -; CHECK-NEXT: [[TMP2:%.*]] = zext <2 x i1> [[B:%.*]] to <2 x i64> -; CHECK-NEXT: ret <2 x i64> [[TMP2]] +; CHECK-NEXT: [[T2:%.*]] = zext <2 x i1> [[B:%.*]] to <2 x i64> +; CHECK-NEXT: ret <2 x i64> [[T2]] ; - %tmp1 = zext <2 x i32> %a to <2 x i64> - %tmp2 = zext <2 x i1> %b to <2 x i64> - %tmp3 = shl nuw <2 x i64> %tmp1, - %tmp4 = or <2 x i64> %tmp3, %tmp2 - %tmp5 = and <2 x i64> %tmp4, - ret <2 x i64> %tmp5 + %t1 = zext <2 x i32> %a to <2 x i64> + %t2 = zext <2 x i1> %b to <2 x i64> + %t3 = shl nuw <2 x i64> %t1, + %t4 = or <2 x i64> %t3, %t2 + %t5 = and <2 x i64> %t4, + ret <2 x i64> %t5 } define <2 x i64> @shl_or_and2v(<2 x i32> %a, <2 x i1> %b) { ; CHECK-LABEL: @shl_or_and2v( -; CHECK-NEXT: [[TMP1:%.*]] = zext <2 x i1> [[B:%.*]] to <2 x i64> -; CHECK-NEXT: [[TMP3:%.*]] = shl nuw <2 x i64> [[TMP1]], -; CHECK-NEXT: ret <2 x i64> [[TMP3]] +; CHECK-NEXT: [[T1:%.*]] = zext <2 x i1> [[B:%.*]] to <2 x i64> +; CHECK-NEXT: [[T3:%.*]] = shl nuw <2 x i64> [[T1]], +; CHECK-NEXT: ret <2 x i64> [[T3]] ; - %tmp1 = zext <2 x i1> %b to <2 x i64> - %tmp2 = zext <2 x i32> %a to <2 x i64> - %tmp3 = shl nuw <2 x i64> %tmp1, - %tmp4 = or <2 x i64> %tmp2, %tmp3 - %tmp5 = and <2 x i64> %tmp4, - ret <2 x i64> %tmp5 + %t1 = zext <2 x i1> %b to <2 x i64> + %t2 = zext <2 x i32> %a to <2 x i64> + %t3 = shl nuw <2 x i64> %t1, + %t4 = or <2 x i64> %t2, %t3 + %t5 = and <2 x i64> %t4, + ret <2 x i64> %t5 } -define <2 x i32> @shl_or_and3v(<2 x i16> %a, <2 x i16> %b) { ; A variation of above test case, but fails due to the mask value +define <2 x i32> @shl_or_and3v(<2 x i16> %a, <2 x i16> %b) { ; CHECK-LABEL: @shl_or_and3v( -; CHECK-NEXT: [[TMP1:%.*]] = zext <2 x i16> [[A:%.*]] to <2 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = zext <2 x i16> [[B:%.*]] to <2 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = shl nuw <2 x i32> [[TMP1]], -; CHECK-NEXT: [[TMP4:%.*]] = or <2 x i32> [[TMP2]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = and <2 x i32> [[TMP4]], -; CHECK-NEXT: ret <2 x i32> [[TMP5]] -; - %tmp1 = zext <2 x i16> %a to <2 x i32> - %tmp2 = zext <2 x i16> %b to <2 x i32> - %tmp3 = shl nuw <2 x i32> %tmp1, - %tmp4 = or <2 x i32> %tmp2, %tmp3 - %tmp5 = and <2 x i32> %tmp4, ; mask with 0xFFFF0001 - ret <2 x i32> %tmp5 +; CHECK-NEXT: [[T1:%.*]] = zext <2 x i16> [[A:%.*]] to <2 x i32> +; CHECK-NEXT: [[T2:%.*]] = zext <2 x i16> [[B:%.*]] to <2 x i32> +; CHECK-NEXT: [[T3:%.*]] = shl nuw <2 x i32> [[T1]], +; CHECK-NEXT: [[T4:%.*]] = or <2 x i32> [[T2]], [[T3]] +; CHECK-NEXT: [[T5:%.*]] = and <2 x i32> [[T4]], +; CHECK-NEXT: ret <2 x i32> [[T5]] +; + %t1 = zext <2 x i16> %a to <2 x i32> + %t2 = zext <2 x i16> %b to <2 x i32> + %t3 = shl nuw <2 x i32> %t1, + %t4 = or <2 x i32> %t2, %t3 + %t5 = and <2 x i32> %t4, ; mask with 0xFFFF0001 + ret <2 x i32> %t5 +} + +define i8 @and_add_sub(i8 %x) { +; CHECK-LABEL: @and_add_sub( +; CHECK-NEXT: [[A:%.*]] = add i8 [[X:%.*]], -1 +; CHECK-NEXT: [[S:%.*]] = sub i8 0, [[X]] +; CHECK-NEXT: [[R:%.*]] = and i8 [[A]], [[S]] +; CHECK-NEXT: ret i8 [[R]] +; + %a = add i8 %x, -1 + %s = sub i8 0, %x + %r = and i8 %a, %s + ret i8 %r +} + +define <2 x i8> @and_sub_add(<2 x i8> %x) { +; CHECK-LABEL: @and_sub_add( +; CHECK-NEXT: [[A:%.*]] = add <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[S:%.*]] = sub <2 x i8> , [[X]] +; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[S]], [[A]] +; CHECK-NEXT: ret <2 x i8> [[R]] +; + %a = add <2 x i8> %x, + %s = sub <2 x i8> , %x + %r = and <2 x i8> %s, %a + ret <2 x i8> %r +} + +define i89 @or_add_sub(i89 %x) { +; CHECK-LABEL: @or_add_sub( +; CHECK-NEXT: [[A:%.*]] = add i89 [[X:%.*]], 5 +; CHECK-NEXT: [[S:%.*]] = sub i89 -6, [[X]] +; CHECK-NEXT: [[R:%.*]] = or i89 [[A]], [[S]] +; CHECK-NEXT: ret i89 [[R]] +; + %a = add i89 %x, 5 + %s = sub i89 -6, %x + %r = or i89 %a, %s + ret i89 %r +} + +define <3 x i8> @or_sub_add(<3 x i8> %x) { +; CHECK-LABEL: @or_sub_add( +; CHECK-NEXT: [[A:%.*]] = add <3 x i8> [[X:%.*]], +; CHECK-NEXT: [[S:%.*]] = sub <3 x i8> , [[X]] +; CHECK-NEXT: [[R:%.*]] = or <3 x i8> [[S]], [[A]] +; CHECK-NEXT: ret <3 x i8> [[R]] +; + %a = add <3 x i8> %x, + %s = sub <3 x i8> , %x + %r = or <3 x i8> %s, %a + ret <3 x i8> %r +} + + +define <2 x i17> @xor_add_sub(<2 x i17> %x) { +; CHECK-LABEL: @xor_add_sub( +; CHECK-NEXT: [[A:%.*]] = add <2 x i17> [[X:%.*]], +; CHECK-NEXT: [[S:%.*]] = sub <2 x i17> , [[X]] +; CHECK-NEXT: [[R:%.*]] = xor <2 x i17> [[A]], [[S]] +; CHECK-NEXT: ret <2 x i17> [[R]] +; + %a = add <2 x i17> %x, + %s = sub <2 x i17> , %x + %r = xor <2 x i17> %a, %s + ret <2 x i17> %r +} + +define i8 @xor_sub_add(i8 %x) { +; CHECK-LABEL: @xor_sub_add( +; CHECK-NEXT: [[A:%.*]] = add i8 [[X:%.*]], 33 +; CHECK-NEXT: [[S:%.*]] = sub i8 -34, [[X]] +; CHECK-NEXT: [[R:%.*]] = xor i8 [[S]], [[A]] +; CHECK-NEXT: ret i8 [[R]] +; + %a = add i8 %x, 33 + %s = sub i8 -34, %x + %r = xor i8 %s, %a + ret i8 %r +} + +define i8 @and_add_sub_wrong_const(i8 %x) { +; CHECK-LABEL: @and_add_sub_wrong_const( +; CHECK-NEXT: [[A:%.*]] = add i8 [[X:%.*]], 6 +; CHECK-NEXT: [[S:%.*]] = sub i8 -6, [[X]] +; CHECK-NEXT: [[R:%.*]] = and i8 [[A]], [[S]] +; CHECK-NEXT: ret i8 [[R]] +; + %a = add i8 %x, 6 + %s = sub i8 -6, %x + %r = and i8 %a, %s + ret i8 %r +} + +define i8 @or_add_sub_wrong_var(i8 %x, i8 %y) { +; CHECK-LABEL: @or_add_sub_wrong_var( +; CHECK-NEXT: [[A:%.*]] = add i8 [[X:%.*]], 5 +; CHECK-NEXT: [[S:%.*]] = sub i8 -6, [[Y:%.*]] +; CHECK-NEXT: [[R:%.*]] = or i8 [[A]], [[S]] +; CHECK-NEXT: ret i8 [[R]] +; + %a = add i8 %x, 5 + %s = sub i8 -6, %y + %r = or i8 %a, %s + ret i8 %r +} + +define i8 @xor_add_sub_wrong_op(i8 %x) { +; CHECK-LABEL: @xor_add_sub_wrong_op( +; CHECK-NEXT: [[A:%.*]] = add i8 [[X:%.*]], 5 +; CHECK-NEXT: [[S:%.*]] = sub i8 [[X]], -6 +; CHECK-NEXT: [[R:%.*]] = xor i8 [[A]], [[S]] +; CHECK-NEXT: ret i8 [[R]] +; + %a = add i8 %x, 5 + %s = sub i8 %x, -6 + %r = xor i8 %a, %s + ret i8 %r } From 38ca7face67e8488d482b66a999d0a685806879f Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Mon, 21 Dec 2020 08:48:04 -0500 Subject: [PATCH 014/378] [InstSimplify] reduce logic with inverted add/sub ops https://llvm.org/PR48559 This could be part of a larger ValueTracking API, but I don't see that currently. https://rise4fun.com/Alive/gR0 Name: and Pre: C1 == ~C2 %sub = add i8 %x, C1 %sub1 = sub i8 C2, %x %r = and i8 %sub, %sub1 => %r = 0 Name: or Pre: C1 == ~C2 %sub = add i8 %x, C1 %sub1 = sub i8 C2, %x %r = or i8 %sub, %sub1 => %r = -1 Name: xor Pre: C1 == ~C2 %sub = add i8 %x, C1 %sub1 = sub i8 C2, %x %r = xor i8 %sub, %sub1 => %r = -1 --- llvm/lib/Analysis/InstructionSimplify.cpp | 33 +++++++++++++++++ llvm/test/Transforms/InstSimplify/AndOrXor.ll | 36 +++++++------------ 2 files changed, 45 insertions(+), 24 deletions(-) diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index 55f3bc4f29236..27b73a5a82366 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -1999,6 +1999,30 @@ static Value *omitCheckForZeroBeforeInvertedMulWithOverflow(Value *Op0, return NotOp1; } +/// Given a bitwise logic op, check if the operands are add/sub with a common +/// source value and inverted constant (identity: C - X -> ~(X + ~C)). +static Value *simplifyLogicOfAddSub(Value *Op0, Value *Op1, + Instruction::BinaryOps Opcode) { + assert(Op0->getType() == Op1->getType() && "Mismatched binop types"); + assert(BinaryOperator::isBitwiseLogicOp(Opcode) && "Expected logic op"); + Value *X; + Constant *C1, *C2; + if ((match(Op0, m_Add(m_Value(X), m_Constant(C1))) && + match(Op1, m_Sub(m_Constant(C2), m_Specific(X)))) || + (match(Op1, m_Add(m_Value(X), m_Constant(C1))) && + match(Op0, m_Sub(m_Constant(C2), m_Specific(X))))) { + if (ConstantExpr::getNot(C1) == C2) { + // (X + C) & (~C - X) --> (X + C) & ~(X + C) --> 0 + // (X + C) | (~C - X) --> (X + C) | ~(X + C) --> -1 + // (X + C) ^ (~C - X) --> (X + C) ^ ~(X + C) --> -1 + Type *Ty = Op0->getType(); + return Opcode == Instruction::And ? ConstantInt::getNullValue(Ty) + : ConstantInt::getAllOnesValue(Ty); + } + } + return nullptr; +} + /// Given operands for an And, see if we can fold the result. /// If not, this returns null. static Value *SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, @@ -2035,6 +2059,9 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, if (match(Op1, m_c_Or(m_Specific(Op0), m_Value()))) return Op0; + if (Value *V = simplifyLogicOfAddSub(Op0, Op1, Instruction::And)) + return V; + // A mask that only clears known zeros of a shifted value is a no-op. Value *X; const APInt *Mask; @@ -2194,6 +2221,9 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, if (match(Op1, m_Not(m_c_And(m_Specific(Op0), m_Value())))) return Constant::getAllOnesValue(Op0->getType()); + if (Value *V = simplifyLogicOfAddSub(Op0, Op1, Instruction::Or)) + return V; + Value *A, *B; // (A & ~B) | (A ^ B) -> (A ^ B) // (~B & A) | (A ^ B) -> (A ^ B) @@ -2323,6 +2353,9 @@ static Value *SimplifyXorInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, match(Op1, m_Not(m_Specific(Op0)))) return Constant::getAllOnesValue(Op0->getType()); + if (Value *V = simplifyLogicOfAddSub(Op0, Op1, Instruction::Xor)) + return V; + // Try some generic simplifications for associative operations. if (Value *V = SimplifyAssociativeBinOp(Instruction::Xor, Op0, Op1, Q, MaxRecurse)) diff --git a/llvm/test/Transforms/InstSimplify/AndOrXor.ll b/llvm/test/Transforms/InstSimplify/AndOrXor.ll index 9e549ebefc6b0..e23262835c3cf 100644 --- a/llvm/test/Transforms/InstSimplify/AndOrXor.ll +++ b/llvm/test/Transforms/InstSimplify/AndOrXor.ll @@ -1053,10 +1053,7 @@ define <2 x i32> @shl_or_and3v(<2 x i16> %a, <2 x i16> %b) { define i8 @and_add_sub(i8 %x) { ; CHECK-LABEL: @and_add_sub( -; CHECK-NEXT: [[A:%.*]] = add i8 [[X:%.*]], -1 -; CHECK-NEXT: [[S:%.*]] = sub i8 0, [[X]] -; CHECK-NEXT: [[R:%.*]] = and i8 [[A]], [[S]] -; CHECK-NEXT: ret i8 [[R]] +; CHECK-NEXT: ret i8 0 ; %a = add i8 %x, -1 %s = sub i8 0, %x @@ -1066,10 +1063,7 @@ define i8 @and_add_sub(i8 %x) { define <2 x i8> @and_sub_add(<2 x i8> %x) { ; CHECK-LABEL: @and_sub_add( -; CHECK-NEXT: [[A:%.*]] = add <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[S:%.*]] = sub <2 x i8> , [[X]] -; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[S]], [[A]] -; CHECK-NEXT: ret <2 x i8> [[R]] +; CHECK-NEXT: ret <2 x i8> zeroinitializer ; %a = add <2 x i8> %x, %s = sub <2 x i8> , %x @@ -1079,10 +1073,7 @@ define <2 x i8> @and_sub_add(<2 x i8> %x) { define i89 @or_add_sub(i89 %x) { ; CHECK-LABEL: @or_add_sub( -; CHECK-NEXT: [[A:%.*]] = add i89 [[X:%.*]], 5 -; CHECK-NEXT: [[S:%.*]] = sub i89 -6, [[X]] -; CHECK-NEXT: [[R:%.*]] = or i89 [[A]], [[S]] -; CHECK-NEXT: ret i89 [[R]] +; CHECK-NEXT: ret i89 -1 ; %a = add i89 %x, 5 %s = sub i89 -6, %x @@ -1092,10 +1083,7 @@ define i89 @or_add_sub(i89 %x) { define <3 x i8> @or_sub_add(<3 x i8> %x) { ; CHECK-LABEL: @or_sub_add( -; CHECK-NEXT: [[A:%.*]] = add <3 x i8> [[X:%.*]], -; CHECK-NEXT: [[S:%.*]] = sub <3 x i8> , [[X]] -; CHECK-NEXT: [[R:%.*]] = or <3 x i8> [[S]], [[A]] -; CHECK-NEXT: ret <3 x i8> [[R]] +; CHECK-NEXT: ret <3 x i8> ; %a = add <3 x i8> %x, %s = sub <3 x i8> , %x @@ -1106,10 +1094,7 @@ define <3 x i8> @or_sub_add(<3 x i8> %x) { define <2 x i17> @xor_add_sub(<2 x i17> %x) { ; CHECK-LABEL: @xor_add_sub( -; CHECK-NEXT: [[A:%.*]] = add <2 x i17> [[X:%.*]], -; CHECK-NEXT: [[S:%.*]] = sub <2 x i17> , [[X]] -; CHECK-NEXT: [[R:%.*]] = xor <2 x i17> [[A]], [[S]] -; CHECK-NEXT: ret <2 x i17> [[R]] +; CHECK-NEXT: ret <2 x i17> ; %a = add <2 x i17> %x, %s = sub <2 x i17> , %x @@ -1119,10 +1104,7 @@ define <2 x i17> @xor_add_sub(<2 x i17> %x) { define i8 @xor_sub_add(i8 %x) { ; CHECK-LABEL: @xor_sub_add( -; CHECK-NEXT: [[A:%.*]] = add i8 [[X:%.*]], 33 -; CHECK-NEXT: [[S:%.*]] = sub i8 -34, [[X]] -; CHECK-NEXT: [[R:%.*]] = xor i8 [[S]], [[A]] -; CHECK-NEXT: ret i8 [[R]] +; CHECK-NEXT: ret i8 -1 ; %a = add i8 %x, 33 %s = sub i8 -34, %x @@ -1130,6 +1112,8 @@ define i8 @xor_sub_add(i8 %x) { ret i8 %r } +; Negative test + define i8 @and_add_sub_wrong_const(i8 %x) { ; CHECK-LABEL: @and_add_sub_wrong_const( ; CHECK-NEXT: [[A:%.*]] = add i8 [[X:%.*]], 6 @@ -1143,6 +1127,8 @@ define i8 @and_add_sub_wrong_const(i8 %x) { ret i8 %r } +; Negative test + define i8 @or_add_sub_wrong_var(i8 %x, i8 %y) { ; CHECK-LABEL: @or_add_sub_wrong_var( ; CHECK-NEXT: [[A:%.*]] = add i8 [[X:%.*]], 5 @@ -1156,6 +1142,8 @@ define i8 @or_add_sub_wrong_var(i8 %x, i8 %y) { ret i8 %r } +; Negative test + define i8 @xor_add_sub_wrong_op(i8 %x) { ; CHECK-LABEL: @xor_add_sub_wrong_op( ; CHECK-NEXT: [[A:%.*]] = add i8 [[X:%.*]], 5 From 3a675c777dd5788e2313cb06fb27b01f8a2e7573 Mon Sep 17 00:00:00 2001 From: "Paul C. Anagnostopoulos" Date: Wed, 16 Dec 2020 09:55:16 -0500 Subject: [PATCH 015/378] [TableGen] Add the !substr() bang operator Update the documentation and add a test. Differential Revision: https://reviews.llvm.org/D93419 --- llvm/docs/TableGen/ProgRef.rst | 10 ++- llvm/include/llvm/TableGen/Record.h | 2 +- llvm/lib/TableGen/Record.cpp | 28 ++++++++- llvm/lib/TableGen/TGLexer.cpp | 1 + llvm/lib/TableGen/TGLexer.h | 6 +- llvm/lib/TableGen/TGParser.cpp | 95 ++++++++++++++++++++++++++++- llvm/lib/TableGen/TGParser.h | 1 + llvm/test/TableGen/substr.td | 81 ++++++++++++++++++++++++ 8 files changed, 215 insertions(+), 9 deletions(-) create mode 100644 llvm/test/TableGen/substr.td diff --git a/llvm/docs/TableGen/ProgRef.rst b/llvm/docs/TableGen/ProgRef.rst index 342b91a0c437b..f2ee7a7e549a8 100644 --- a/llvm/docs/TableGen/ProgRef.rst +++ b/llvm/docs/TableGen/ProgRef.rst @@ -216,7 +216,8 @@ TableGen provides "bang operators" that have a wide variety of uses: : !interleave !isa !le !listconcat !listsplat : !lt !mul !ne !not !or : !setdagop !shl !size !sra !srl - : !strconcat !sub !subst !tail !xor + : !strconcat !sub !subst !substr !tail + : !xor The ``!cond`` operator has a slightly different syntax compared to other bang operators, so it is defined separately: @@ -1723,6 +1724,13 @@ and non-0 as true. record if the *target* record name equals the *value* record name; otherwise it produces the *value*. +``!substr(``\ *string*\ ``,`` *start*\ [``,`` *length*]\ ``)`` + This operator extracts a substring of the given *string*. The starting + position of the substring is specified by *start*, which can range + between 0 and the length of the string. The length of the substring + is specified by *length*; if not specified, the rest of the string is + extracted. The *start* and *length* arguments must be integers. + ``!tail(``\ *a*\ ``)`` This operator produces a new list with all the elements of the list *a* except for the zeroth one. (See also ``!head``.) diff --git a/llvm/include/llvm/TableGen/Record.h b/llvm/include/llvm/TableGen/Record.h index 3010b4dad09a8..a0c5b2778547d 100644 --- a/llvm/include/llvm/TableGen/Record.h +++ b/llvm/include/llvm/TableGen/Record.h @@ -829,7 +829,7 @@ class BinOpInit : public OpInit, public FoldingSetNode { /// !op (X, Y, Z) - Combine two inits. class TernOpInit : public OpInit, public FoldingSetNode { public: - enum TernaryOp : uint8_t { SUBST, FOREACH, FILTER, IF, DAG }; + enum TernaryOp : uint8_t { SUBST, FOREACH, FILTER, IF, DAG, SUBSTR }; private: Init *LHS, *MHS, *RHS; diff --git a/llvm/lib/TableGen/Record.cpp b/llvm/lib/TableGen/Record.cpp index cbdce04494f37..9c0464d4e1bf6 100644 --- a/llvm/lib/TableGen/Record.cpp +++ b/llvm/lib/TableGen/Record.cpp @@ -1325,6 +1325,27 @@ Init *TernOpInit::Fold(Record *CurRec) const { } break; } + + case SUBSTR: { + StringInit *LHSs = dyn_cast(LHS); + IntInit *MHSi = dyn_cast(MHS); + IntInit *RHSi = dyn_cast(RHS); + if (LHSs && MHSi && RHSi) { + int64_t StringSize = LHSs->getValue().size(); + int64_t Start = MHSi->getValue(); + int64_t Length = RHSi->getValue(); + if (Start < 0 || Start > StringSize) + PrintError(CurRec->getLoc(), + Twine("!substr start position is out of range 0...") + + std::to_string(StringSize) + ": " + + std::to_string(Start)); + if (Length < 0) + PrintError(CurRec->getLoc(), "!substr length must be nonnegative"); + return StringInit::get(LHSs->getValue().substr(Start, Length), + LHSs->getFormat()); + } + break; + } } return const_cast(this); @@ -1364,11 +1385,12 @@ std::string TernOpInit::getAsString() const { std::string Result; bool UnquotedLHS = false; switch (getOpcode()) { - case SUBST: Result = "!subst"; break; - case FOREACH: Result = "!foreach"; UnquotedLHS = true; break; + case DAG: Result = "!dag"; break; case FILTER: Result = "!filter"; UnquotedLHS = true; break; + case FOREACH: Result = "!foreach"; UnquotedLHS = true; break; case IF: Result = "!if"; break; - case DAG: Result = "!dag"; break; + case SUBST: Result = "!subst"; break; + case SUBSTR: Result = "!substr"; break; } return (Result + "(" + (UnquotedLHS ? LHS->getAsUnquotedString() : LHS->getAsString()) + diff --git a/llvm/lib/TableGen/TGLexer.cpp b/llvm/lib/TableGen/TGLexer.cpp index df0df96f40eb7..a45ef6dc10c16 100644 --- a/llvm/lib/TableGen/TGLexer.cpp +++ b/llvm/lib/TableGen/TGLexer.cpp @@ -589,6 +589,7 @@ tgtok::TokKind TGLexer::LexExclaim() { .Case("listsplat", tgtok::XListSplat) .Case("strconcat", tgtok::XStrConcat) .Case("interleave", tgtok::XInterleave) + .Case("substr", tgtok::XSubstr) .Cases("setdagop", "setop", tgtok::XSetDagOp) // !setop is deprecated. .Cases("getdagop", "getop", tgtok::XGetDagOp) // !getop is deprecated. .Default(tgtok::Error); diff --git a/llvm/lib/TableGen/TGLexer.h b/llvm/lib/TableGen/TGLexer.h index 1856bef3ea9bd..ee568849ca887 100644 --- a/llvm/lib/TableGen/TGLexer.h +++ b/llvm/lib/TableGen/TGLexer.h @@ -53,9 +53,9 @@ namespace tgtok { // Bang operators. XConcat, XADD, XSUB, XMUL, XNOT, XAND, XOR, XXOR, XSRA, XSRL, XSHL, - XListConcat, XListSplat, XStrConcat, XInterleave, XCast, XSubst, XForEach, - XFilter, XFoldl, XHead, XTail, XSize, XEmpty, XIf, XCond, XEq, XIsA, - XDag, XNe, XLe, XLt, XGe, XGt, XSetDagOp, XGetDagOp, + XListConcat, XListSplat, XStrConcat, XInterleave, XSubstr, XCast, + XSubst, XForEach, XFilter, XFoldl, XHead, XTail, XSize, XEmpty, XIf, + XCond, XEq, XIsA, XDag, XNe, XLe, XLt, XGe, XGt, XSetDagOp, XGetDagOp, // Boolean literals. TrueVal, FalseVal, diff --git a/llvm/lib/TableGen/TGParser.cpp b/llvm/lib/TableGen/TGParser.cpp index 2671d29a72721..7308cf1b4924e 100644 --- a/llvm/lib/TableGen/TGParser.cpp +++ b/llvm/lib/TableGen/TGParser.cpp @@ -1496,6 +1496,10 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) { return (TernOpInit::get(Code, LHS, MHS, RHS, Type))->Fold(CurRec); } + case tgtok::XSubstr: { + return ParseOperationSubstr(CurRec, ItemType); + } + case tgtok::XCond: return ParseOperationCond(CurRec, ItemType); @@ -1655,6 +1659,94 @@ RecTy *TGParser::ParseOperatorType() { return Type; } +/// Parse the !substr operation. Return null on error. +/// +/// Substr ::= !substr(string, start-int [, length-int]) => string +Init *TGParser::ParseOperationSubstr(Record *CurRec, RecTy *ItemType) { + TernOpInit::TernaryOp Code = TernOpInit::SUBSTR; + RecTy *Type = StringRecTy::get(); + + Lex.Lex(); // eat the operation + + if (!consume(tgtok::l_paren)) { + TokError("expected '(' after !substr operator"); + return nullptr; + } + + Init *LHS = ParseValue(CurRec); + if (!LHS) + return nullptr; + + if (!consume(tgtok::comma)) { + TokError("expected ',' in !substr operator"); + return nullptr; + } + + SMLoc MHSLoc = Lex.getLoc(); + Init *MHS = ParseValue(CurRec); + if (!MHS) + return nullptr; + + SMLoc RHSLoc = Lex.getLoc(); + Init *RHS; + if (consume(tgtok::comma)) { + RHSLoc = Lex.getLoc(); + RHS = ParseValue(CurRec); + if (!RHS) + return nullptr; + } else { + RHS = IntInit::get(SIZE_MAX); + } + + if (!consume(tgtok::r_paren)) { + TokError("expected ')' in !substr operator"); + return nullptr; + } + + if (ItemType && !Type->typeIsConvertibleTo(ItemType)) { + Error(RHSLoc, Twine("expected value of type '") + + ItemType->getAsString() + "', got '" + + Type->getAsString() + "'"); + } + + TypedInit *LHSt = dyn_cast(LHS); + if (!LHSt && !isa(LHS)) { + TokError("could not determine type of the string in !substr"); + return nullptr; + } + if (LHSt && !isa(LHSt->getType())) { + TokError(Twine("expected string, got type '") + + LHSt->getType()->getAsString() + "'"); + return nullptr; + } + + TypedInit *MHSt = dyn_cast(MHS); + if (!MHSt && !isa(MHS)) { + TokError("could not determine type of the start position in !substr"); + return nullptr; + } + if (MHSt && !isa(MHSt->getType())) { + Error(MHSLoc, Twine("expected int, got type '") + + MHSt->getType()->getAsString() + "'"); + return nullptr; + } + + if (RHS) { + TypedInit *RHSt = dyn_cast(RHS); + if (!RHSt && !isa(RHS)) { + TokError("could not determine type of the length in !substr"); + return nullptr; + } + if (RHSt && !isa(RHSt->getType())) { + TokError(Twine("expected int, got type '") + + RHSt->getType()->getAsString() + "'"); + return nullptr; + } + } + + return (TernOpInit::get(Code, LHS, MHS, RHS, Type))->Fold(CurRec); +} + /// Parse the !foreach and !filter operations. Return null on error. /// /// ForEach ::= !foreach(ID, list-or-dag, expr) => list @@ -2206,7 +2298,8 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType, case tgtok::XFoldl: case tgtok::XForEach: case tgtok::XFilter: - case tgtok::XSubst: { // Value ::= !ternop '(' Value ',' Value ',' Value ')' + case tgtok::XSubst: + case tgtok::XSubstr: { // Value ::= !ternop '(' Value ',' Value ',' Value ')' return ParseOperation(CurRec, ItemType); } } diff --git a/llvm/lib/TableGen/TGParser.h b/llvm/lib/TableGen/TGParser.h index bdeb4d35382b2..3ed78a23067ff 100644 --- a/llvm/lib/TableGen/TGParser.h +++ b/llvm/lib/TableGen/TGParser.h @@ -254,6 +254,7 @@ class TGParser { TypedInit *FirstItem = nullptr); RecTy *ParseType(); Init *ParseOperation(Record *CurRec, RecTy *ItemType); + Init *ParseOperationSubstr(Record *CurRec, RecTy *ItemType); Init *ParseOperationForEachFilter(Record *CurRec, RecTy *ItemType); Init *ParseOperationCond(Record *CurRec, RecTy *ItemType); RecTy *ParseOperatorType(); diff --git a/llvm/test/TableGen/substr.td b/llvm/test/TableGen/substr.td new file mode 100644 index 0000000000000..5efe4ce69215e --- /dev/null +++ b/llvm/test/TableGen/substr.td @@ -0,0 +1,81 @@ +// RUN: llvm-tblgen %s | FileCheck %s +// RUN: not llvm-tblgen -DERROR1 %s 2>&1 | FileCheck --check-prefix=ERROR1 %s + +defvar claim = "This is the end of the world!"; + +// CHECK: def Rec1 +// CHECK: fullNoLength = "This is the end of the world!"; +// CHECK: fullLength = "This is the end of the world!"; +// CHECK: thisIsTheEnd = "This is the end"; +// CHECK: DoorsSong = "the end"; +// CHECK: finalNoLength = "end of the world!"; +// CHECK: finalLength = "end of the world!"; + +def Rec1 { + string fullNoLength = !substr(claim, 0); + string fullLength = !substr(claim, 0, 999); + string thisIsTheEnd = !substr(claim, 0, 15); + string DoorsSong = !substr(claim, 8, 7); + string finalNoLength = !substr(claim, 12); + string finalLength = !substr(claim, 12, !sub(!size(claim), 12)); +} + +// CHECK: def Rec2 { +// CHECK: lastName = "Flintstone"; + +def Rec2 { + string firstName = "Fred"; + string name = firstName # " " # "Flintstone"; + string lastName = !substr(name, !add(!size(firstName), 1)); +} + +// CHECK: def Rec3 { +// CHECK: test1 = ""; +// CHECK: test2 = ""; +// CHECK: test3 = ""; +// CHECK: test4 = "h"; +// CHECK: test5 = "hello"; +// CHECK: test6 = ""; + +def Rec3 { + string test1 = !substr("", 0, 0); + string test2 = !substr("", 0, 9); + string test3 = !substr("hello", 0, 0); + string test4 = !substr("hello", 0, 1); + string test5 = !substr("hello", 0, 99); + string test6 = !substr("hello", 5, 99); +} + +// CHECK: def Rec4 +// CHECK: message = "This is the end of the world!"; +// CHECK: messagePrefix = "This is th..."; +// CHECK: warning = "Bad message: 'This is th...'"; + +class C { + string message = msg; + string messagePrefix = !substr(message, 0, 10) # "..."; +} + +def Rec4 : C { + string warning = "Bad message: '" # messagePrefix # "'"; +} + +#ifdef ERROR1 + +// ERROR1: expected string, got type 'int' +// ERROR1: expected int, got type 'bits<3>' +// ERROR1: expected int, got type 'string' +// ERROR1: !substr start position is out of range 0...29: 30 +// ERROR1: !substr length must be nonnegative + +def Rec8 { + string claim1 = !substr(42, 0, 3); + string claim2 = !substr(claim, 0b101); + string claim3 = !substr(claim, 0, "oops"); +} + +def Rec9 { + string claim1 = !substr(claim, !add(!size(claim), 1)); + string claim2 = !substr(claim, 0, -13); +} +#endif From 88c5b5006064d62cae4592e66f5bc8b7a7326ef2 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 21 Dec 2020 15:22:27 +0000 Subject: [PATCH 016/378] [AggressiveInstCombine] Generalize foldGuardedRotateToFunnelShift to generic funnel shifts (REAPPLIED) The fold currently only handles rotation patterns, but with the maturation of backend funnel shift handling we can now realistically handle all funnel shift patterns. This should allow us to begin resolving PR46896 et al. Ensure we block poison in a funnel shift value - similar to rG0fe91ad463fea9d08cbcd640a62aa9ca2d8d05e0 Reapplied with fix for PR48068 - we weren't checking that the shift values could be hoisted from their basicblocks. Differential Revision: https://reviews.llvm.org/D90625 --- .../AggressiveInstCombine.cpp | 71 +++++++---- .../AggressiveInstCombine/funnel.ll | 118 ++++++++++-------- .../AggressiveInstCombine/rotate.ll | 11 +- 3 files changed, 122 insertions(+), 78 deletions(-) diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp index e7fb699d9fda2..a7ae10d156d57 100644 --- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp +++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp @@ -21,6 +21,7 @@ #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" @@ -39,6 +40,8 @@ using namespace PatternMatch; STATISTIC(NumAnyOrAllBitsSet, "Number of any/all-bits-set patterns folded"); STATISTIC(NumGuardedRotates, "Number of guarded rotates transformed into funnel shifts"); +STATISTIC(NumGuardedFunnelShifts, + "Number of guarded funnel shifts transformed into funnel shifts"); STATISTIC(NumPopCountRecognized, "Number of popcount idioms recognized"); namespace { @@ -67,17 +70,17 @@ class AggressiveInstCombinerLegacyPass : public FunctionPass { }; } // namespace -/// Match a pattern for a bitwise rotate operation that partially guards -/// against undefined behavior by branching around the rotation when the shift -/// amount is 0. -static bool foldGuardedRotateToFunnelShift(Instruction &I) { +/// Match a pattern for a bitwise funnel/rotate operation that partially guards +/// against undefined behavior by branching around the funnel-shift/rotation +/// when the shift amount is 0. +static bool foldGuardedFunnelShift(Instruction &I, const DominatorTree &DT) { if (I.getOpcode() != Instruction::PHI || I.getNumOperands() != 2) return false; // As with the one-use checks below, this is not strictly necessary, but we // are being cautious to avoid potential perf regressions on targets that - // do not actually have a rotate instruction (where the funnel shift would be - // expanded back into math/shift/logic ops). + // do not actually have a funnel/rotate instruction (where the funnel shift + // would be expanded back into math/shift/logic ops). if (!isPowerOf2_32(I.getType()->getScalarSizeInBits())) return false; @@ -111,30 +114,41 @@ static bool foldGuardedRotateToFunnelShift(Instruction &I) { return Intrinsic::not_intrinsic; }; - // One phi operand must be a rotate operation, and the other phi operand must - // be the source value of that rotate operation: + // One phi operand must be a funnel/rotate operation, and the other phi + // operand must be the source value of that funnel/rotate operation: // phi [ rotate(RotSrc, ShAmt), FunnelBB ], [ RotSrc, GuardBB ] + // phi [ fshl(ShVal0, ShVal1, ShAmt), FunnelBB ], [ ShVal0, GuardBB ] + // phi [ fshr(ShVal0, ShVal1, ShAmt), FunnelBB ], [ ShVal1, GuardBB ] PHINode &Phi = cast(I); unsigned FunnelOp = 0, GuardOp = 1; Value *P0 = Phi.getOperand(0), *P1 = Phi.getOperand(1); Value *ShVal0, *ShVal1, *ShAmt; Intrinsic::ID IID = matchFunnelShift(P0, ShVal0, ShVal1, ShAmt); - if (IID == Intrinsic::not_intrinsic || ShVal0 != ShVal1 || ShVal0 != P1) { + if (IID == Intrinsic::not_intrinsic || + (IID == Intrinsic::fshl && ShVal0 != P1) || + (IID == Intrinsic::fshr && ShVal1 != P1)) { IID = matchFunnelShift(P1, ShVal0, ShVal1, ShAmt); - if (IID == Intrinsic::not_intrinsic || ShVal0 != ShVal1 || ShVal0 != P0) + if (IID == Intrinsic::not_intrinsic || + (IID == Intrinsic::fshl && ShVal0 != P0) || + (IID == Intrinsic::fshr && ShVal1 != P0)) return false; assert((IID == Intrinsic::fshl || IID == Intrinsic::fshr) && "Pattern must match funnel shift left or right"); std::swap(FunnelOp, GuardOp); } - assert(ShVal0 == ShVal1 && "Rotation funnel shift pattern expected"); // The incoming block with our source operand must be the "guard" block. - // That must contain a cmp+branch to avoid the rotate when the shift amount - // is equal to 0. The other incoming block is the block with the rotate. + // That must contain a cmp+branch to avoid the funnel/rotate when the shift + // amount is equal to 0. The other incoming block is the block with the + // funnel/rotate. BasicBlock *GuardBB = Phi.getIncomingBlock(GuardOp); BasicBlock *FunnelBB = Phi.getIncomingBlock(FunnelOp); Instruction *TermI = GuardBB->getTerminator(); + + // Ensure that the shift values dominate each block. + if (!DT.dominates(ShVal0, TermI) || !DT.dominates(ShVal1, TermI)) + return false; + ICmpInst::Predicate Pred; BasicBlock *PhiBB = Phi.getParent(); if (!match(TermI, m_Br(m_ICmp(Pred, m_Specific(ShAmt), m_ZeroInt()), @@ -144,24 +158,39 @@ static bool foldGuardedRotateToFunnelShift(Instruction &I) { if (Pred != CmpInst::ICMP_EQ) return false; + IRBuilder<> Builder(PhiBB, PhiBB->getFirstInsertionPt()); + + if (ShVal0 == ShVal1) + ++NumGuardedRotates; + else + ++NumGuardedFunnelShifts; + + // If this is not a rotate then the select was blocking poison from the + // 'shift-by-zero' non-TVal, but a funnel shift won't - so freeze it. + bool IsFshl = IID == Intrinsic::fshl; + if (ShVal0 != ShVal1) { + if (IsFshl && !llvm::isGuaranteedNotToBePoison(ShVal1)) + ShVal1 = Builder.CreateFreeze(ShVal1); + else if (!IsFshl && !llvm::isGuaranteedNotToBePoison(ShVal0)) + ShVal0 = Builder.CreateFreeze(ShVal0); + } + // We matched a variation of this IR pattern: // GuardBB: // %cmp = icmp eq i32 %ShAmt, 0 // br i1 %cmp, label %PhiBB, label %FunnelBB // FunnelBB: // %sub = sub i32 32, %ShAmt - // %shr = lshr i32 %RotSrc, %sub - // %shl = shl i32 %RotSrc, %ShAmt - // %rot = or i32 %shr, %shl + // %shr = lshr i32 %ShVal1, %sub + // %shl = shl i32 %ShVal0, %ShAmt + // %fsh = or i32 %shr, %shl // br label %PhiBB // PhiBB: - // %cond = phi i32 [ %RotSrc, %FunnelBB ], [ %RotSrc, %GuardBB ] + // %cond = phi i32 [ %fsh, %FunnelBB ], [ %ShVal0, %GuardBB ] // --> - // llvm.fshl.i32(i32 %RotSrc, i32 %RotSrc, i32 %ShAmt) - IRBuilder<> Builder(PhiBB, PhiBB->getFirstInsertionPt()); + // llvm.fshl.i32(i32 %ShVal0, i32 %ShVal1, i32 %ShAmt) Function *F = Intrinsic::getDeclaration(Phi.getModule(), IID, Phi.getType()); Phi.replaceAllUsesWith(Builder.CreateCall(F, {ShVal0, ShVal1, ShAmt})); - ++NumGuardedRotates; return true; } @@ -350,7 +379,7 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT) { // iteratively in this loop rather than waiting until the end. for (Instruction &I : make_range(BB.rbegin(), BB.rend())) { MadeChange |= foldAnyOrAllBitsSet(I); - MadeChange |= foldGuardedRotateToFunnelShift(I); + MadeChange |= foldGuardedFunnelShift(I, DT); MadeChange |= tryToRecognizePopCount(I); } } diff --git a/llvm/test/Transforms/AggressiveInstCombine/funnel.ll b/llvm/test/Transforms/AggressiveInstCombine/funnel.ll index 545b8e1759827..1322e95920d20 100644 --- a/llvm/test/Transforms/AggressiveInstCombine/funnel.ll +++ b/llvm/test/Transforms/AggressiveInstCombine/funnel.ll @@ -7,14 +7,11 @@ define i32 @fshl(i32 %a, i32 %b, i32 %c) { ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0 ; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[FSHBB:%.*]] ; CHECK: fshbb: -; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[C]] -; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[B:%.*]], [[SUB]] -; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[A:%.*]], [[C]] -; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHR]], [[SHL]] ; CHECK-NEXT: br label [[END]] ; CHECK: end: -; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[OR]], [[FSHBB]] ], [ [[A]], [[ENTRY:%.*]] ] -; CHECK-NEXT: ret i32 [[COND]] +; CHECK-NEXT: [[TMP0:%.*]] = freeze i32 [[B:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.fshl.i32(i32 [[A:%.*]], i32 [[TMP0]], i32 [[C]]) +; CHECK-NEXT: ret i32 [[TMP1]] ; entry: %cmp = icmp eq i32 %c, 0 @@ -38,14 +35,11 @@ define i32 @fshl_commute_phi(i32 %a, i32 %b, i32 %c) { ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0 ; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[FSHBB:%.*]] ; CHECK: fshbb: -; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[C]] -; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[B:%.*]], [[SUB]] -; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[A:%.*]], [[C]] -; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHR]], [[SHL]] ; CHECK-NEXT: br label [[END]] ; CHECK: end: -; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[A]], [[ENTRY:%.*]] ], [ [[OR]], [[FSHBB]] ] -; CHECK-NEXT: ret i32 [[COND]] +; CHECK-NEXT: [[TMP0:%.*]] = freeze i32 [[B:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.fshl.i32(i32 [[A:%.*]], i32 [[TMP0]], i32 [[C]]) +; CHECK-NEXT: ret i32 [[TMP1]] ; entry: %cmp = icmp eq i32 %c, 0 @@ -69,14 +63,11 @@ define i32 @fshl_commute_or(i32 %a, i32 %b, i32 %c) { ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0 ; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[FSHBB:%.*]] ; CHECK: fshbb: -; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[C]] -; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[B:%.*]], [[SUB]] -; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[A:%.*]], [[C]] -; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHL]], [[SHR]] ; CHECK-NEXT: br label [[END]] ; CHECK: end: -; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[A]], [[ENTRY:%.*]] ], [ [[OR]], [[FSHBB]] ] -; CHECK-NEXT: ret i32 [[COND]] +; CHECK-NEXT: [[TMP0:%.*]] = freeze i32 [[B:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.fshl.i32(i32 [[A:%.*]], i32 [[TMP0]], i32 [[C]]) +; CHECK-NEXT: ret i32 [[TMP1]] ; entry: %cmp = icmp eq i32 %c, 0 @@ -102,15 +93,12 @@ define i32 @fshl_insert_valid_location(i32 %a, i32 %b, i32 %c) { ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0 ; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[FSHBB:%.*]] ; CHECK: fshbb: -; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[C]] -; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[B:%.*]], [[SUB]] -; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[A:%.*]], [[C]] -; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHR]], [[SHL]] ; CHECK-NEXT: br label [[END]] ; CHECK: end: -; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[OR]], [[FSHBB]] ], [ [[A]], [[ENTRY:%.*]] ] -; CHECK-NEXT: [[OTHER:%.*]] = phi i32 [ 1, [[FSHBB]] ], [ 2, [[ENTRY]] ] -; CHECK-NEXT: [[RES:%.*]] = or i32 [[COND]], [[OTHER]] +; CHECK-NEXT: [[OTHER:%.*]] = phi i32 [ 1, [[FSHBB]] ], [ 2, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[TMP0:%.*]] = freeze i32 [[B:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.fshl.i32(i32 [[A:%.*]], i32 [[TMP0]], i32 [[C]]) +; CHECK-NEXT: [[RES:%.*]] = or i32 [[TMP1]], [[OTHER]] ; CHECK-NEXT: ret i32 [[RES]] ; entry: @@ -137,14 +125,11 @@ define i32 @fshr(i32 %a, i32 %b, i32 %c) { ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0 ; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[FSHBB:%.*]] ; CHECK: fshbb: -; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[C]] -; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[A:%.*]], [[SUB]] -; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[B:%.*]], [[C]] -; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHR]], [[SHL]] ; CHECK-NEXT: br label [[END]] ; CHECK: end: -; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[OR]], [[FSHBB]] ], [ [[B]], [[ENTRY:%.*]] ] -; CHECK-NEXT: ret i32 [[COND]] +; CHECK-NEXT: [[TMP0:%.*]] = freeze i32 [[A:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.fshr.i32(i32 [[TMP0]], i32 [[B:%.*]], i32 [[C]]) +; CHECK-NEXT: ret i32 [[TMP1]] ; entry: %cmp = icmp eq i32 %c, 0 @@ -168,14 +153,11 @@ define i32 @fshr_commute_phi(i32 %a, i32 %b, i32 %c) { ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0 ; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[FSHBB:%.*]] ; CHECK: fshbb: -; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[C]] -; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[A:%.*]], [[SUB]] -; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[B:%.*]], [[C]] -; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHR]], [[SHL]] ; CHECK-NEXT: br label [[END]] ; CHECK: end: -; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[B]], [[ENTRY:%.*]] ], [ [[OR]], [[FSHBB]] ] -; CHECK-NEXT: ret i32 [[COND]] +; CHECK-NEXT: [[TMP0:%.*]] = freeze i32 [[A:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.fshr.i32(i32 [[TMP0]], i32 [[B:%.*]], i32 [[C]]) +; CHECK-NEXT: ret i32 [[TMP1]] ; entry: %cmp = icmp eq i32 %c, 0 @@ -199,14 +181,11 @@ define i32 @fshr_commute_or(i32 %a, i32 %b, i32 %c) { ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0 ; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[FSHBB:%.*]] ; CHECK: fshbb: -; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[C]] -; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[A:%.*]], [[SUB]] -; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[B:%.*]], [[C]] -; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHL]], [[SHR]] ; CHECK-NEXT: br label [[END]] ; CHECK: end: -; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[B]], [[ENTRY:%.*]] ], [ [[OR]], [[FSHBB]] ] -; CHECK-NEXT: ret i32 [[COND]] +; CHECK-NEXT: [[TMP0:%.*]] = freeze i32 [[A:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.fshr.i32(i32 [[TMP0]], i32 [[B:%.*]], i32 [[C]]) +; CHECK-NEXT: ret i32 [[TMP1]] ; entry: %cmp = icmp eq i32 %c, 0 @@ -396,7 +375,7 @@ end: ret i32 %cond } -; Negative test - wrong shift. +; Negative test - wrong shift for rotate (but can be folded to a generic funnel shift). define i32 @not_fshr_5(i32 %a, i32 %b, i32 %c) { ; CHECK-LABEL: @not_fshr_5( @@ -404,14 +383,11 @@ define i32 @not_fshr_5(i32 %a, i32 %b, i32 %c) { ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0 ; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[FSHBB:%.*]] ; CHECK: fshbb: -; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[C]] -; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[C]], [[SUB]] -; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[B:%.*]], [[C]] -; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHL]], [[SHR]] ; CHECK-NEXT: br label [[END]] ; CHECK: end: -; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[B]], [[ENTRY:%.*]] ], [ [[OR]], [[FSHBB]] ] -; CHECK-NEXT: ret i32 [[COND]] +; CHECK-NEXT: [[TMP0:%.*]] = freeze i32 [[C]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.fshr.i32(i32 [[TMP0]], i32 [[B:%.*]], i32 [[C]]) +; CHECK-NEXT: ret i32 [[TMP1]] ; entry: %cmp = icmp eq i32 %c, 0 @@ -500,3 +476,45 @@ end: ret i32 %cond } +; PR48068 - Ensure we don't fold a funnel shift that depends on a shift value that +; can't be hoisted out of a basic block. +@a = global i32 0, align 4 +declare i32 @i(...) +declare i32 @f(...) + +define i32 @PR48068() { +; CHECK-LABEL: @PR48068( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CALL:%.*]] = call i32 bitcast (i32 (...)* @i to i32 ()*)() +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* @a, align 4 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[CALL]], [[TMP0]] +; CHECK-NEXT: [[CALL_I:%.*]] = call i32 bitcast (i32 (...)* @f to i32 ()*)() +; CHECK-NEXT: [[SUB_I:%.*]] = sub nsw i32 32, [[TMP0]] +; CHECK-NEXT: [[SHR_I:%.*]] = lshr i32 [[CALL_I]], [[SUB_I]] +; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHL]], [[SHR_I]] +; CHECK-NEXT: br label [[IF_END]] +; CHECK: if.end: +; CHECK-NEXT: [[H_0:%.*]] = phi i32 [ [[OR]], [[IF_THEN]] ], [ [[CALL]], [[ENTRY:%.*]] ] +; CHECK-NEXT: ret i32 [[H_0]] +; +entry: + %call = call i32 bitcast (i32 (...)* @i to i32 ()*)() + %0 = load i32, i32* @a, align 4 + %tobool.not = icmp eq i32 %0, 0 + br i1 %tobool.not, label %if.end, label %if.then + +if.then: ; preds = %entry + %shl = shl i32 %call, %0 + %call.i = call i32 bitcast (i32 (...)* @f to i32 ()*)() + %sub.i = sub nsw i32 32, %0 + %shr.i = lshr i32 %call.i, %sub.i + %or = or i32 %shl, %shr.i + br label %if.end + +if.end: ; preds = %if.then, %entry + %h.0 = phi i32 [ %or, %if.then ], [ %call, %entry ] + ret i32 %h.0 +} diff --git a/llvm/test/Transforms/AggressiveInstCombine/rotate.ll b/llvm/test/Transforms/AggressiveInstCombine/rotate.ll index e47fa9be6f83e..0abc6c574603c 100644 --- a/llvm/test/Transforms/AggressiveInstCombine/rotate.ll +++ b/llvm/test/Transforms/AggressiveInstCombine/rotate.ll @@ -370,7 +370,7 @@ end: ret i32 %cond } -; Negative test - wrong shift. +; Negative test - wrong shift for rotate (but can be folded to a generic funnel shift). define i32 @not_rotr_5(i32 %a, i32 %b) { ; CHECK-LABEL: @not_rotr_5( @@ -378,14 +378,11 @@ define i32 @not_rotr_5(i32 %a, i32 %b) { ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[B:%.*]], 0 ; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[ROTBB:%.*]] ; CHECK: rotbb: -; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[B]] -; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[B]], [[SUB]] -; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[A:%.*]], [[B]] -; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHL]], [[SHR]] ; CHECK-NEXT: br label [[END]] ; CHECK: end: -; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[A]], [[ENTRY:%.*]] ], [ [[OR]], [[ROTBB]] ] -; CHECK-NEXT: ret i32 [[COND]] +; CHECK-NEXT: [[TMP0:%.*]] = freeze i32 [[B]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.fshr.i32(i32 [[TMP0]], i32 [[A:%.*]], i32 [[B]]) +; CHECK-NEXT: ret i32 [[TMP1]] ; entry: %cmp = icmp eq i32 %b, 0 From d56982b6f5fb17fb1fa50b31ab4b67b8d3a76c24 Mon Sep 17 00:00:00 2001 From: Michael Forster Date: Mon, 21 Dec 2020 16:21:31 +0100 Subject: [PATCH 017/378] Remove unused variables. Differential Revision: https://reviews.llvm.org/D93635 --- llvm/lib/Transforms/IPO/IROutliner.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/llvm/lib/Transforms/IPO/IROutliner.cpp b/llvm/lib/Transforms/IPO/IROutliner.cpp index 4031eedced7c1..c879031faf5ae 100644 --- a/llvm/lib/Transforms/IPO/IROutliner.cpp +++ b/llvm/lib/Transforms/IPO/IROutliner.cpp @@ -429,8 +429,7 @@ findExtractedInputToOverallInputMapping(OutlinableRegion &Region, // It is not a constant, check if it is a sunken alloca. If it is not, // create the mapping from extracted to overall. If it is, create the // mapping of the index to the value. - unsigned Found = ArgInputs.count(Input); - assert(Found && "Input cannot be found!"); + assert(ArgInputs.count(Input) && "Input cannot be found!"); Region.ExtractedArgToAgg.insert(std::make_pair(OriginalIndex, TypeIndex)); Region.AggArgToExtracted.insert(std::make_pair(TypeIndex, OriginalIndex)); @@ -475,7 +474,6 @@ void IROutliner::findAddInputsOutputs(Module &M, OutlinableRegion &Region) { /// \returns a call instruction with the replaced function. CallInst *replaceCalledFunction(Module &M, OutlinableRegion &Region) { std::vector NewCallArgs; - DenseMap::iterator ArgPair; OutlinableGroup &Group = *Region.Parent; CallInst *Call = Region.Call; From 554eb1f6dc49e616b70254a7976699b3eff84366 Mon Sep 17 00:00:00 2001 From: "Paul C. Anagnostopoulos" Date: Mon, 21 Dec 2020 10:45:30 -0500 Subject: [PATCH 018/378] Revert "[TableGen] Add the !substr() bang operator" This reverts commit 3a675c777dd5788e2313cb06fb27b01f8a2e7573. --- llvm/docs/TableGen/ProgRef.rst | 10 +-- llvm/include/llvm/TableGen/Record.h | 2 +- llvm/lib/TableGen/Record.cpp | 28 +-------- llvm/lib/TableGen/TGLexer.cpp | 1 - llvm/lib/TableGen/TGLexer.h | 6 +- llvm/lib/TableGen/TGParser.cpp | 95 +---------------------------- llvm/lib/TableGen/TGParser.h | 1 - llvm/test/TableGen/substr.td | 81 ------------------------ 8 files changed, 9 insertions(+), 215 deletions(-) delete mode 100644 llvm/test/TableGen/substr.td diff --git a/llvm/docs/TableGen/ProgRef.rst b/llvm/docs/TableGen/ProgRef.rst index f2ee7a7e549a8..342b91a0c437b 100644 --- a/llvm/docs/TableGen/ProgRef.rst +++ b/llvm/docs/TableGen/ProgRef.rst @@ -216,8 +216,7 @@ TableGen provides "bang operators" that have a wide variety of uses: : !interleave !isa !le !listconcat !listsplat : !lt !mul !ne !not !or : !setdagop !shl !size !sra !srl - : !strconcat !sub !subst !substr !tail - : !xor + : !strconcat !sub !subst !tail !xor The ``!cond`` operator has a slightly different syntax compared to other bang operators, so it is defined separately: @@ -1724,13 +1723,6 @@ and non-0 as true. record if the *target* record name equals the *value* record name; otherwise it produces the *value*. -``!substr(``\ *string*\ ``,`` *start*\ [``,`` *length*]\ ``)`` - This operator extracts a substring of the given *string*. The starting - position of the substring is specified by *start*, which can range - between 0 and the length of the string. The length of the substring - is specified by *length*; if not specified, the rest of the string is - extracted. The *start* and *length* arguments must be integers. - ``!tail(``\ *a*\ ``)`` This operator produces a new list with all the elements of the list *a* except for the zeroth one. (See also ``!head``.) diff --git a/llvm/include/llvm/TableGen/Record.h b/llvm/include/llvm/TableGen/Record.h index a0c5b2778547d..3010b4dad09a8 100644 --- a/llvm/include/llvm/TableGen/Record.h +++ b/llvm/include/llvm/TableGen/Record.h @@ -829,7 +829,7 @@ class BinOpInit : public OpInit, public FoldingSetNode { /// !op (X, Y, Z) - Combine two inits. class TernOpInit : public OpInit, public FoldingSetNode { public: - enum TernaryOp : uint8_t { SUBST, FOREACH, FILTER, IF, DAG, SUBSTR }; + enum TernaryOp : uint8_t { SUBST, FOREACH, FILTER, IF, DAG }; private: Init *LHS, *MHS, *RHS; diff --git a/llvm/lib/TableGen/Record.cpp b/llvm/lib/TableGen/Record.cpp index 9c0464d4e1bf6..cbdce04494f37 100644 --- a/llvm/lib/TableGen/Record.cpp +++ b/llvm/lib/TableGen/Record.cpp @@ -1325,27 +1325,6 @@ Init *TernOpInit::Fold(Record *CurRec) const { } break; } - - case SUBSTR: { - StringInit *LHSs = dyn_cast(LHS); - IntInit *MHSi = dyn_cast(MHS); - IntInit *RHSi = dyn_cast(RHS); - if (LHSs && MHSi && RHSi) { - int64_t StringSize = LHSs->getValue().size(); - int64_t Start = MHSi->getValue(); - int64_t Length = RHSi->getValue(); - if (Start < 0 || Start > StringSize) - PrintError(CurRec->getLoc(), - Twine("!substr start position is out of range 0...") + - std::to_string(StringSize) + ": " + - std::to_string(Start)); - if (Length < 0) - PrintError(CurRec->getLoc(), "!substr length must be nonnegative"); - return StringInit::get(LHSs->getValue().substr(Start, Length), - LHSs->getFormat()); - } - break; - } } return const_cast(this); @@ -1385,12 +1364,11 @@ std::string TernOpInit::getAsString() const { std::string Result; bool UnquotedLHS = false; switch (getOpcode()) { - case DAG: Result = "!dag"; break; - case FILTER: Result = "!filter"; UnquotedLHS = true; break; + case SUBST: Result = "!subst"; break; case FOREACH: Result = "!foreach"; UnquotedLHS = true; break; + case FILTER: Result = "!filter"; UnquotedLHS = true; break; case IF: Result = "!if"; break; - case SUBST: Result = "!subst"; break; - case SUBSTR: Result = "!substr"; break; + case DAG: Result = "!dag"; break; } return (Result + "(" + (UnquotedLHS ? LHS->getAsUnquotedString() : LHS->getAsString()) + diff --git a/llvm/lib/TableGen/TGLexer.cpp b/llvm/lib/TableGen/TGLexer.cpp index a45ef6dc10c16..df0df96f40eb7 100644 --- a/llvm/lib/TableGen/TGLexer.cpp +++ b/llvm/lib/TableGen/TGLexer.cpp @@ -589,7 +589,6 @@ tgtok::TokKind TGLexer::LexExclaim() { .Case("listsplat", tgtok::XListSplat) .Case("strconcat", tgtok::XStrConcat) .Case("interleave", tgtok::XInterleave) - .Case("substr", tgtok::XSubstr) .Cases("setdagop", "setop", tgtok::XSetDagOp) // !setop is deprecated. .Cases("getdagop", "getop", tgtok::XGetDagOp) // !getop is deprecated. .Default(tgtok::Error); diff --git a/llvm/lib/TableGen/TGLexer.h b/llvm/lib/TableGen/TGLexer.h index ee568849ca887..1856bef3ea9bd 100644 --- a/llvm/lib/TableGen/TGLexer.h +++ b/llvm/lib/TableGen/TGLexer.h @@ -53,9 +53,9 @@ namespace tgtok { // Bang operators. XConcat, XADD, XSUB, XMUL, XNOT, XAND, XOR, XXOR, XSRA, XSRL, XSHL, - XListConcat, XListSplat, XStrConcat, XInterleave, XSubstr, XCast, - XSubst, XForEach, XFilter, XFoldl, XHead, XTail, XSize, XEmpty, XIf, - XCond, XEq, XIsA, XDag, XNe, XLe, XLt, XGe, XGt, XSetDagOp, XGetDagOp, + XListConcat, XListSplat, XStrConcat, XInterleave, XCast, XSubst, XForEach, + XFilter, XFoldl, XHead, XTail, XSize, XEmpty, XIf, XCond, XEq, XIsA, + XDag, XNe, XLe, XLt, XGe, XGt, XSetDagOp, XGetDagOp, // Boolean literals. TrueVal, FalseVal, diff --git a/llvm/lib/TableGen/TGParser.cpp b/llvm/lib/TableGen/TGParser.cpp index 7308cf1b4924e..2671d29a72721 100644 --- a/llvm/lib/TableGen/TGParser.cpp +++ b/llvm/lib/TableGen/TGParser.cpp @@ -1496,10 +1496,6 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) { return (TernOpInit::get(Code, LHS, MHS, RHS, Type))->Fold(CurRec); } - case tgtok::XSubstr: { - return ParseOperationSubstr(CurRec, ItemType); - } - case tgtok::XCond: return ParseOperationCond(CurRec, ItemType); @@ -1659,94 +1655,6 @@ RecTy *TGParser::ParseOperatorType() { return Type; } -/// Parse the !substr operation. Return null on error. -/// -/// Substr ::= !substr(string, start-int [, length-int]) => string -Init *TGParser::ParseOperationSubstr(Record *CurRec, RecTy *ItemType) { - TernOpInit::TernaryOp Code = TernOpInit::SUBSTR; - RecTy *Type = StringRecTy::get(); - - Lex.Lex(); // eat the operation - - if (!consume(tgtok::l_paren)) { - TokError("expected '(' after !substr operator"); - return nullptr; - } - - Init *LHS = ParseValue(CurRec); - if (!LHS) - return nullptr; - - if (!consume(tgtok::comma)) { - TokError("expected ',' in !substr operator"); - return nullptr; - } - - SMLoc MHSLoc = Lex.getLoc(); - Init *MHS = ParseValue(CurRec); - if (!MHS) - return nullptr; - - SMLoc RHSLoc = Lex.getLoc(); - Init *RHS; - if (consume(tgtok::comma)) { - RHSLoc = Lex.getLoc(); - RHS = ParseValue(CurRec); - if (!RHS) - return nullptr; - } else { - RHS = IntInit::get(SIZE_MAX); - } - - if (!consume(tgtok::r_paren)) { - TokError("expected ')' in !substr operator"); - return nullptr; - } - - if (ItemType && !Type->typeIsConvertibleTo(ItemType)) { - Error(RHSLoc, Twine("expected value of type '") + - ItemType->getAsString() + "', got '" + - Type->getAsString() + "'"); - } - - TypedInit *LHSt = dyn_cast(LHS); - if (!LHSt && !isa(LHS)) { - TokError("could not determine type of the string in !substr"); - return nullptr; - } - if (LHSt && !isa(LHSt->getType())) { - TokError(Twine("expected string, got type '") + - LHSt->getType()->getAsString() + "'"); - return nullptr; - } - - TypedInit *MHSt = dyn_cast(MHS); - if (!MHSt && !isa(MHS)) { - TokError("could not determine type of the start position in !substr"); - return nullptr; - } - if (MHSt && !isa(MHSt->getType())) { - Error(MHSLoc, Twine("expected int, got type '") + - MHSt->getType()->getAsString() + "'"); - return nullptr; - } - - if (RHS) { - TypedInit *RHSt = dyn_cast(RHS); - if (!RHSt && !isa(RHS)) { - TokError("could not determine type of the length in !substr"); - return nullptr; - } - if (RHSt && !isa(RHSt->getType())) { - TokError(Twine("expected int, got type '") + - RHSt->getType()->getAsString() + "'"); - return nullptr; - } - } - - return (TernOpInit::get(Code, LHS, MHS, RHS, Type))->Fold(CurRec); -} - /// Parse the !foreach and !filter operations. Return null on error. /// /// ForEach ::= !foreach(ID, list-or-dag, expr) => list @@ -2298,8 +2206,7 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType, case tgtok::XFoldl: case tgtok::XForEach: case tgtok::XFilter: - case tgtok::XSubst: - case tgtok::XSubstr: { // Value ::= !ternop '(' Value ',' Value ',' Value ')' + case tgtok::XSubst: { // Value ::= !ternop '(' Value ',' Value ',' Value ')' return ParseOperation(CurRec, ItemType); } } diff --git a/llvm/lib/TableGen/TGParser.h b/llvm/lib/TableGen/TGParser.h index 3ed78a23067ff..bdeb4d35382b2 100644 --- a/llvm/lib/TableGen/TGParser.h +++ b/llvm/lib/TableGen/TGParser.h @@ -254,7 +254,6 @@ class TGParser { TypedInit *FirstItem = nullptr); RecTy *ParseType(); Init *ParseOperation(Record *CurRec, RecTy *ItemType); - Init *ParseOperationSubstr(Record *CurRec, RecTy *ItemType); Init *ParseOperationForEachFilter(Record *CurRec, RecTy *ItemType); Init *ParseOperationCond(Record *CurRec, RecTy *ItemType); RecTy *ParseOperatorType(); diff --git a/llvm/test/TableGen/substr.td b/llvm/test/TableGen/substr.td deleted file mode 100644 index 5efe4ce69215e..0000000000000 --- a/llvm/test/TableGen/substr.td +++ /dev/null @@ -1,81 +0,0 @@ -// RUN: llvm-tblgen %s | FileCheck %s -// RUN: not llvm-tblgen -DERROR1 %s 2>&1 | FileCheck --check-prefix=ERROR1 %s - -defvar claim = "This is the end of the world!"; - -// CHECK: def Rec1 -// CHECK: fullNoLength = "This is the end of the world!"; -// CHECK: fullLength = "This is the end of the world!"; -// CHECK: thisIsTheEnd = "This is the end"; -// CHECK: DoorsSong = "the end"; -// CHECK: finalNoLength = "end of the world!"; -// CHECK: finalLength = "end of the world!"; - -def Rec1 { - string fullNoLength = !substr(claim, 0); - string fullLength = !substr(claim, 0, 999); - string thisIsTheEnd = !substr(claim, 0, 15); - string DoorsSong = !substr(claim, 8, 7); - string finalNoLength = !substr(claim, 12); - string finalLength = !substr(claim, 12, !sub(!size(claim), 12)); -} - -// CHECK: def Rec2 { -// CHECK: lastName = "Flintstone"; - -def Rec2 { - string firstName = "Fred"; - string name = firstName # " " # "Flintstone"; - string lastName = !substr(name, !add(!size(firstName), 1)); -} - -// CHECK: def Rec3 { -// CHECK: test1 = ""; -// CHECK: test2 = ""; -// CHECK: test3 = ""; -// CHECK: test4 = "h"; -// CHECK: test5 = "hello"; -// CHECK: test6 = ""; - -def Rec3 { - string test1 = !substr("", 0, 0); - string test2 = !substr("", 0, 9); - string test3 = !substr("hello", 0, 0); - string test4 = !substr("hello", 0, 1); - string test5 = !substr("hello", 0, 99); - string test6 = !substr("hello", 5, 99); -} - -// CHECK: def Rec4 -// CHECK: message = "This is the end of the world!"; -// CHECK: messagePrefix = "This is th..."; -// CHECK: warning = "Bad message: 'This is th...'"; - -class C { - string message = msg; - string messagePrefix = !substr(message, 0, 10) # "..."; -} - -def Rec4 : C { - string warning = "Bad message: '" # messagePrefix # "'"; -} - -#ifdef ERROR1 - -// ERROR1: expected string, got type 'int' -// ERROR1: expected int, got type 'bits<3>' -// ERROR1: expected int, got type 'string' -// ERROR1: !substr start position is out of range 0...29: 30 -// ERROR1: !substr length must be nonnegative - -def Rec8 { - string claim1 = !substr(42, 0, 3); - string claim2 = !substr(claim, 0b101); - string claim3 = !substr(claim, 0, "oops"); -} - -def Rec9 { - string claim1 = !substr(claim, !add(!size(claim), 1)); - string claim2 = !substr(claim, 0, -13); -} -#endif From e25afcfa51abbd63ddbe943913af1ba61161ab28 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Mon, 21 Dec 2020 08:45:41 -0800 Subject: [PATCH 019/378] [ELF][PPC64] Detect missing R_PPC64_TLSGD/R_PPC64_TLSLD and disable TLS relaxation Alternative to D91611. The TLS General Dynamic/Local Dynamic code sequences need to mark `__tls_get_addr` with R_PPC64_TLSGD or R_PPC64_TLSLD, e.g. ``` addis r3, r2, x@got@tlsgd@ha # R_PPC64_GOT_TLSGD16_HA addi r3, r3, x@got@tlsgd@l # R_PPC64_GOT_TLSGD16_LO bl __tls_get_addr(x@tlsgd) # R_PPC64_TLSGD followed by R_PPC64_REL24 nop ``` However, there are two deviations form the above: 1. direct call to `__tls_get_addr`. This is essential to implement ld.so in glibc/musl/FreeBSD. ``` bl __tls_get_addr nop ``` This is only used in a -shared link, and thus not subject to the GD/LD to IE/LE relaxation issue below. 2. Missing R_PPC64_TLSGD/R_PPC64_TLSGD for compiler generated TLS references According to Stefan Pintille, "In the early days of the transition from the ELFv1 ABI that is used for big endian PowerPC Linux distributions to the ELFv2 ABI that is used for little endian PowerPC Linux distributions, there was some ambiguity in the specification of the relocations for TLS. The GNU linker has implemented support for correct handling of calls to __tls_get_addr with a missing relocation. Unfortunately, we didn't notice that the IBM XL compiler did not handle TLS according to the updated ABI until we tried linking XL compiled libraries with LLD." In short, LLD needs to work around the old IBM XL compiler issue. Otherwise, if the object file is linked in -no-pie or -pie mode, the result will be incorrect because the 4 instructions are partially rewritten (the latter 2 are not changed). Work around the compiler bug by disable General Dynamic/Local Dynamic to Initial Exec/Local Exec relaxation. Note, we also disable Initial Exec to Local Exec relaxation for implementation simplicity, though technically it can be kept. ppc64-tls-missing-gdld.s demonstrates the updated behavior. Reviewed By: #powerpc, stefanp, grimar Differential Revision: https://reviews.llvm.org/D92959 --- lld/ELF/InputFiles.h | 4 ++ lld/ELF/Relocations.cpp | 46 ++++++++++++++- lld/test/ELF/ppc64-tls-missing-gdld.s | 85 ++++++++++++++++++++++++--- 3 files changed, 126 insertions(+), 9 deletions(-) diff --git a/lld/ELF/InputFiles.h b/lld/ELF/InputFiles.h index 8c898280b85c4..7ffe4c29cb878 100644 --- a/lld/ELF/InputFiles.h +++ b/lld/ELF/InputFiles.h @@ -130,6 +130,10 @@ class InputFile { // [.got, .got + 0xFFFC]. bool ppc64SmallCodeModelTocRelocs = false; + // True if the file has TLSGD/TLSLD GOT relocations without R_PPC64_TLSGD or + // R_PPC64_TLSLD. Disable TLS relaxation to avoid bad code generation. + bool ppc64DisableTLSRelax = false; + // groupId is used for --warn-backrefs which is an optional error // checking feature. All files within the same --{start,end}-group or // --{start,end}-lib get the same group ID. Otherwise, each file gets a new diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp index 18151c064c355..a9f627a080570 100644 --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -208,9 +208,13 @@ handleTlsRelocation(RelType type, Symbol &sym, InputSectionBase &c, return 1; } + // ARM, Hexagon and RISC-V do not support GD/LD to IE/LE relaxation. For + // PPC64, if the file has missing R_PPC64_TLSGD/R_PPC64_TLSLD, disable + // relaxation as well. bool toExecRelax = !config->shared && config->emachine != EM_ARM && config->emachine != EM_HEXAGON && - config->emachine != EM_RISCV; + config->emachine != EM_RISCV && + !c.file->ppc64DisableTLSRelax; // If we are producing an executable and the symbol is non-preemptable, it // must be defined and the code sequence can be relaxed to use Local-Exec. @@ -1527,6 +1531,43 @@ static void scanReloc(InputSectionBase &sec, OffsetGetter &getOffset, RelTy *&i, processRelocAux(sec, expr, type, offset, sym, rel, addend); } +// R_PPC64_TLSGD/R_PPC64_TLSLD is required to mark `bl __tls_get_addr` for +// General Dynamic/Local Dynamic code sequences. If a GD/LD GOT relocation is +// found but no R_PPC64_TLSGD/R_PPC64_TLSLD is seen, we assume that the +// instructions are generated by very old IBM XL compilers. Work around the +// issue by disabling GD/LD to IE/LE relaxation. +template +static void checkPPC64TLSRelax(InputSectionBase &sec, ArrayRef rels) { + // Skip if sec is synthetic (sec.file is null) or if sec has been marked. + if (!sec.file || sec.file->ppc64DisableTLSRelax) + return; + bool hasGDLD = false; + for (const RelTy &rel : rels) { + RelType type = rel.getType(false); + switch (type) { + case R_PPC64_TLSGD: + case R_PPC64_TLSLD: + return; // Found a marker + case R_PPC64_GOT_TLSGD16: + case R_PPC64_GOT_TLSGD16_HA: + case R_PPC64_GOT_TLSGD16_HI: + case R_PPC64_GOT_TLSGD16_LO: + case R_PPC64_GOT_TLSLD16: + case R_PPC64_GOT_TLSLD16_HA: + case R_PPC64_GOT_TLSLD16_HI: + case R_PPC64_GOT_TLSLD16_LO: + hasGDLD = true; + break; + } + } + if (hasGDLD) { + sec.file->ppc64DisableTLSRelax = true; + warn(toString(sec.file) + + ": disable TLS relaxation due to R_PPC64_GOT_TLS* relocations without " + "R_PPC64_TLSGD/R_PPC64_TLSLD relocations"); + } +} + template static void scanRelocs(InputSectionBase &sec, ArrayRef rels) { OffsetGetter getOffset(sec); @@ -1534,6 +1575,9 @@ static void scanRelocs(InputSectionBase &sec, ArrayRef rels) { // Not all relocations end up in Sec.Relocations, but a lot do. sec.relocations.reserve(rels.size()); + if (config->emachine == EM_PPC64) + checkPPC64TLSRelax(sec, rels); + for (auto i = rels.begin(), end = rels.end(); i != end;) scanReloc(sec, getOffset, i, rels.begin(), end); diff --git a/lld/test/ELF/ppc64-tls-missing-gdld.s b/lld/test/ELF/ppc64-tls-missing-gdld.s index fcc1a2c21a25d..a37a45c11cae5 100644 --- a/lld/test/ELF/ppc64-tls-missing-gdld.s +++ b/lld/test/ELF/ppc64-tls-missing-gdld.s @@ -1,37 +1,106 @@ # REQUIRES: ppc -# RUN: llvm-mc --triple=powerpc64le %s --filetype=obj -o %t1.o -# RUN: llvm-mc --triple=powerpc64 %s --filetype=obj -o %t2.o -# RUN: ld.lld --shared --fatal-warnings %t1.o -o /dev/null -# RUN: ld.lld --shared --fatal-warnings %t2.o -o /dev/null +# RUN: split-file %s %t +# RUN: llvm-mc --triple=ppc64le %t/a.s --filetype=obj -o %t/a.o +# RUN: llvm-mc --triple=ppc64le %t/b.s --filetype=obj -o %t/b.o +# RUN: llvm-mc --triple=ppc64le %t/tga.s --filetype=obj -o %t/tga.o ## User code can call __tls_get_addr by specifying the tls_index parameter. ## We need to allow R_PPC64_REL24/R_PPC64_REL24_NOTOC referencing __tls_get_addr ## without a pairing R_PPC64_TLSGD/R_PPC64_TLSLD. +# RUN: ld.lld --shared --fatal-warnings %t/b.o -o /dev/null +## Warn missing R_PPC64_TLSGD/R_PPC64_TLSLD. +# RUN: ld.lld --shared %t/a.o -o %t.so 2>&1 | FileCheck %s --check-prefix=WARN +# RUN: llvm-objdump -d --no-leading-addr %t.so | FileCheck %s --check-prefix=DIS + +# RUN: ld.lld %t/a.o %t/tga.o -o %t2 2>&1 | FileCheck %s --check-prefix=WARN +# RUN: llvm-readelf -x .got %t2 | FileCheck %s --check-prefix=HEX +# RUN: llvm-objdump -d --no-leading-addr %t2 | FileCheck %s --check-prefix=DIS + +# WARN: warning: {{.*}}.o: disable TLS relaxation due to R_PPC64_GOT_TLS* relocations without R_PPC64_TLSGD/R_PPC64_TLSLD relocations + +## .got+0: x is local - relaxed to LE - its DTPMOD/DTPREL slots are link-time constants. +## DTPMOD is 1. DTPREL is st_value-0x8000 = -0x8000. +## .got+16: DTPMOD/DTPREL for _TLS_MODULE_BASE_ is 1 and 0, respectively. +## .got+32: TPOFFSET for x = st_value-0x7000 +# HEX: section '.got': +# HEX-NEXT: [[#%x,IGNORE:]] 01000000 00000000 0080ffff ffffffff +# HEX-NEXT: [[#%x,IGNORE:]] 01000000 00000000 00000000 00000000 +# HEX-NEXT: [[#%x,IGNORE:]] 0090ffff ffffffff + +## .TOC.-32768 = (.got+0x8000)-32768 = .got +# DIS-LABEL: : +# DIS-NEXT: addis 3, 2, 0 +# DIS-NEXT: addi 3, 3, -32768 +# DIS-NEXT: bl [[#%x,TGA:]] +# DIS-LABEL: : +# DIS-NEXT: addis 3, 2, 0 +# DIS-NEXT: addi 3, 3, -32768 +# DIS-NEXT: bl [[#TGA]] + +## LocalDynamic references _TLS_MODULE_BASE_. +## .TOC.-32752 = (.got+0x8000)-32752 = .got+16 +# DIS-LABEL: : +# DIS-NEXT: addis 3, 2, 0 +# DIS-NEXT: addi 3, 3, -32752 +# DIS-NEXT: bl [[#TGA]] +# DIS-LABEL: : +# DIS-NEXT: addis 3, 2, 0 +# DIS-NEXT: addi 3, 3, -32752 +# DIS-NEXT: bl [[#TGA]] + +## Technically we don't have to disable IE to LE relaxation, +## but disabling it for implementation simplicity does not hurt. +# DIS-LABEL: : +# DIS-NEXT: addis 3, 2, 0 +# DIS-NEXT: ld 3, -32736(3) +# DIS-NEXT: add 3, 3, 13 + +#--- a.s GeneralDynamic: addis 3, 2, x@got@tlsgd@ha addi 3, 3, x@got@tlsgd@l bl __tls_get_addr - blr + nop GeneralDynamic_NOTOC: addis 3, 2, x@got@tlsgd@ha addi 3, 3, x@got@tlsgd@l bl __tls_get_addr@notoc - blr + nop LocalDynamic: addis 3, 2, x@got@tlsld@ha addi 3, 3, x@got@tlsld@l bl __tls_get_addr - blr + nop LocalDynamic_NOTOC: addis 3, 2, x@got@tlsld@ha addi 3, 3, x@got@tlsld@l bl __tls_get_addr@notoc - blr + nop + +InitialExec: + addis 3, 2, x@got@tprel@ha + ld 3, x@got@tprel@l(3) + add 3, 3, x@tls +.globl _start +_start: + +.section .tbss,"awT",@nobits +.globl x +x: + .quad 0 + +#--- b.s CallOnly: bl __tls_get_addr + nop + blr + +#--- tga.s +.globl __tls_get_addr +__tls_get_addr: blr From fb3c1b3de5ce7342438d7451f01a14f9c52323cd Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Mon, 21 Dec 2020 08:47:04 -0800 Subject: [PATCH 020/378] [ELF] Reject local-exec TLS relocations for -shared For x86-64, D33100 added a diagnostic for local-exec TLS relocations referencing a preemptible symbol. This patch generalizes it to non-preemptible symbols (see `-Bsymbolic` in `tls.s`) on all targets. Local-exec TLS relocations resolve to offsets relative to a fixed point within the static TLS block, which are only meaningful for the executable. With this change, `clang -fpic -shared -fuse-ld=bfd a.c` on the following example will be flagged for AArch64/ARM/i386/x86-64/RISC-V ``` static __attribute__((tls_model("local-exec"))) __thread long TlsVar = 42; long bump() { return ++TlsVar; } ``` Note, in GNU ld, at least arm, riscv and x86's ports have the similar diagnostics, but aarch64 and ppc64 do not error. Differential Revision: https://reviews.llvm.org/D93331 --- lld/ELF/Relocations.cpp | 15 ++++-- lld/test/ELF/Inputs/i386-static-tls-model4.s | 9 ---- lld/test/ELF/aarch64-tls-le.s | 8 +++ lld/test/ELF/arm-tls-le32.s | 7 +++ lld/test/ELF/i386-static-tls-model.s | 8 --- lld/test/ELF/i386-tls-le.s | 57 ++++++++------------ lld/test/ELF/i386-zrel-zrela.s | 8 +-- lld/test/ELF/mips-tls-hilo.s | 6 ++- lld/test/ELF/ppc64-local-exec-tls.s | 12 +++++ lld/test/ELF/riscv-tls-le.s | 7 +++ lld/test/ELF/tls.s | 20 +++++++ lld/test/ELF/x86-64-reloc-tpoff32-fpic.s | 14 ----- 12 files changed, 94 insertions(+), 77 deletions(-) delete mode 100644 lld/test/ELF/Inputs/i386-static-tls-model4.s delete mode 100644 lld/test/ELF/x86-64-reloc-tpoff32-fpic.s diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp index a9f627a080570..875ecf78ca2c7 100644 --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -1400,10 +1400,17 @@ static void scanReloc(InputSectionBase &sec, OffsetGetter &getOffset, RelTy *&i, in.got->hasGotOffRel = true; } - // Process some TLS relocations, including relaxing TLS relocations. - // Note that this function does not handle all TLS relocations. - if (unsigned processed = - handleTlsRelocation(type, sym, sec, offset, addend, expr)) { + // Process TLS relocations, including relaxing TLS relocations. Note that + // R_TPREL and R_TPREL_NEG relocations are resolved in processRelocAux. + if (expr == R_TPREL || expr == R_TPREL_NEG) { + if (config->shared) { + errorOrWarn("relocation " + toString(type) + " against " + toString(sym) + + " cannot be used with -shared" + + getLocation(sec, sym, offset)); + return; + } + } else if (unsigned processed = handleTlsRelocation( + type, sym, sec, offset, addend, expr)) { i += (processed - 1); return; } diff --git a/lld/test/ELF/Inputs/i386-static-tls-model4.s b/lld/test/ELF/Inputs/i386-static-tls-model4.s deleted file mode 100644 index 6006518bfd7ce..0000000000000 --- a/lld/test/ELF/Inputs/i386-static-tls-model4.s +++ /dev/null @@ -1,9 +0,0 @@ -.section ".tdata", "awT", @progbits -.globl var -var: - -.section .foo, "aw" -.global _start -_start: - movl %gs:0, %eax - leal var@ntpoff(%eax), %eax # R_386_TLS_LE diff --git a/lld/test/ELF/aarch64-tls-le.s b/lld/test/ELF/aarch64-tls-le.s index e63a379fcc45e..c43345e8e254f 100644 --- a/lld/test/ELF/aarch64-tls-le.s +++ b/lld/test/ELF/aarch64-tls-le.s @@ -8,6 +8,14 @@ #RELOC: Relocations [ #RELOC-NEXT: ] +## Reject local-exec TLS relocations for -shared. +# RUN: not ld.lld -shared %tmain.o -o /dev/null 2>&1 | FileCheck %s --check-prefix=ERR --implicit-check-not=error: + +# ERR: error: relocation R_AARCH64_TLSLE_ADD_TPREL_HI12 against v1 cannot be used with -shared +# ERR: error: relocation R_AARCH64_TLSLE_ADD_TPREL_LO12_NC against v1 cannot be used with -shared +# ERR: error: relocation R_AARCH64_TLSLE_ADD_TPREL_HI12 against v2 cannot be used with -shared +# ERR: error: relocation R_AARCH64_TLSLE_ADD_TPREL_LO12_NC against v2 cannot be used with -shared + .globl _start _start: mrs x0, TPIDR_EL0 diff --git a/lld/test/ELF/arm-tls-le32.s b/lld/test/ELF/arm-tls-le32.s index 739752209db44..49469c2b6bfa6 100644 --- a/lld/test/ELF/arm-tls-le32.s +++ b/lld/test/ELF/arm-tls-le32.s @@ -8,6 +8,13 @@ /// statically for an application. The code sequences assume a thread pointer /// in r9 +/// Reject local-exec TLS relocations for -shared. +// RUN: not ld.lld -shared %t.o -o /dev/null 2>&1 | FileCheck %s --check-prefix=ERR --implicit-check-not=error: + +// ERR: error: relocation R_ARM_TLS_LE32 against x cannot be used with -shared +// ERR: error: relocation R_ARM_TLS_LE32 against y cannot be used with -shared +// ERR: error: relocation R_ARM_TLS_LE32 against z cannot be used with -shared + .text .syntax unified .globl _start diff --git a/lld/test/ELF/i386-static-tls-model.s b/lld/test/ELF/i386-static-tls-model.s index cfd7cf6ba9722..dfbbed25ef228 100644 --- a/lld/test/ELF/i386-static-tls-model.s +++ b/lld/test/ELF/i386-static-tls-model.s @@ -1,9 +1,5 @@ # REQUIRES: x86 -# RUN: llvm-mc -filetype=obj -triple=i686-pc-linux %S/Inputs/i386-static-tls-model1.s -o %t.o -# RUN: ld.lld %t.o -o %t1 -shared -# RUN: llvm-readobj --dynamic-table %t1 | FileCheck %s - # RUN: llvm-mc -filetype=obj -triple=i686-pc-linux %S/Inputs/i386-static-tls-model2.s -o %t.o # RUN: ld.lld %t.o -o %t2 -shared # RUN: llvm-readobj --dynamic-table %t2 | FileCheck %s @@ -12,9 +8,5 @@ # RUN: ld.lld %t.o -o %t3 -shared # RUN: llvm-readobj --dynamic-table %t3 | FileCheck %s -# RUN: llvm-mc -filetype=obj -triple=i686-pc-linux %S/Inputs/i386-static-tls-model4.s -o %t.o -# RUN: ld.lld %t.o -o %t4 -shared -# RUN: llvm-readobj --dynamic-table %t4 | FileCheck %s - # CHECK: DynamicSection [ # CHECK: FLAGS STATIC_TLS diff --git a/lld/test/ELF/i386-tls-le.s b/lld/test/ELF/i386-tls-le.s index b94ffbb67f07e..bcb4a8e21abca 100644 --- a/lld/test/ELF/i386-tls-le.s +++ b/lld/test/ELF/i386-tls-le.s @@ -1,11 +1,19 @@ # REQUIRES: x86 # RUN: llvm-mc -filetype=obj -triple=i686 %s -o %t.o # RUN: ld.lld %t.o -o %t -# RUN: ld.lld %t.o -shared -o %t.so +# RUN: ld.lld %t.o -pie -o %t.pie # RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck %s --check-prefix=DIS # RUN: llvm-readobj -r %t | FileCheck %s --check-prefix=RELOC -# RUN: llvm-objdump -d --no-show-raw-insn %t.so | FileCheck %s --check-prefix=DISSHARED -# RUN: llvm-readobj -r %t.so | FileCheck %s --check-prefix=RELOCSHARED +# RUN: llvm-objdump -d --no-show-raw-insn %t.pie | FileCheck %s --check-prefix=DIS +# RUN: llvm-readobj -r %t.pie | FileCheck %s --check-prefix=RELOC + +## Reject local-exec TLS relocations for -shared. +# RUN: not ld.lld -shared %t.o -o /dev/null 2>&1 | FileCheck %s --check-prefix=ERR --implicit-check-not=error: + +# ERR: error: relocation R_386_TLS_LE_32 against var cannot be used with -shared +# ERR: error: relocation R_386_TLS_LE_32 against var1 cannot be used with -shared +# ERR: error: relocation R_386_TLS_LE against var cannot be used with -shared +# ERR: error: relocation R_386_TLS_LE against var1 cannot be used with -shared .section ".tdata", "awT", @progbits .globl var @@ -33,39 +41,16 @@ _start: # DIS: Disassembly of section test: # DIS-EMPTY: # DIS-NEXT: <_start>: -# DIS-NEXT: 402134: movl $8, %edx -# DIS-NEXT: 402139: movl %gs:0, %ecx -# DIS-NEXT: 402140: subl %edx, %eax -# DIS-NEXT: 402142: movl $4, %edx -# DIS-NEXT: 402147: movl %gs:0, %ecx -# DIS-NEXT: 40214e: subl %edx, %eax -# DIS-NEXT: 402150: movl %gs:0, %ecx -# DIS-NEXT: 402157: leal -8(%ecx), %eax -# DIS-NEXT: 40215d: movl %gs:0, %ecx -# DIS-NEXT: 402164: leal 119(%ecx), %eax +# DIS-NEXT: movl $8, %edx +# DIS-NEXT: movl %gs:0, %ecx +# DIS-NEXT: subl %edx, %eax +# DIS-NEXT: movl $4, %edx +# DIS-NEXT: movl %gs:0, %ecx +# DIS-NEXT: subl %edx, %eax +# DIS-NEXT: movl %gs:0, %ecx +# DIS-NEXT: leal -8(%ecx), %eax +# DIS-NEXT: movl %gs:0, %ecx +# DIS-NEXT: leal 119(%ecx), %eax # RELOC: Relocations [ # RELOC-NEXT: ] - -# DISSHARED: Disassembly of section test: -# DISSHARED-EMPTY: -# DISSHARED-NEXT: <_start>: -# DISSHARED-NEXT: 2218: movl $0, %edx -# DISSHARED-NEXT: 221d: movl %gs:0, %ecx -# DISSHARED-NEXT: 2224: subl %edx, %eax -# DISSHARED-NEXT: 2226: movl $0, %edx -# DISSHARED-NEXT: 222b: movl %gs:0, %ecx -# DISSHARED-NEXT: 2232: subl %edx, %eax -# DISSHARED-NEXT: 2234: movl %gs:0, %ecx -# DISSHARED-NEXT: 223b: leal (%ecx), %eax -# DISSHARED-NEXT: 2241: movl %gs:0, %ecx -# DISSHARED-NEXT: 2248: leal 123(%ecx), %eax - -# RELOCSHARED: Relocations [ -# RELOCSHARED-NEXT: Section (5) .rel.dyn { -# RELOCSHARED-NEXT: 0x2219 R_386_TLS_TPOFF32 var -# RELOCSHARED-NEXT: 0x223D R_386_TLS_TPOFF var -# RELOCSHARED-NEXT: 0x2227 R_386_TLS_TPOFF32 var1 -# RELOCSHARED-NEXT: 0x224A R_386_TLS_TPOFF var1 -# RELOCSHARED-NEXT: } -# RELOCSHARED-NEXT: ] diff --git a/lld/test/ELF/i386-zrel-zrela.s b/lld/test/ELF/i386-zrel-zrela.s index 27ff3bcab9a53..5e73ca2035241 100644 --- a/lld/test/ELF/i386-zrel-zrela.s +++ b/lld/test/ELF/i386-zrel-zrela.s @@ -27,7 +27,7 @@ # REL-NEXT: } # REL: Hex dump of section '.data': -# REL-NEXT: 0x000042cc cc420000 2a000000 +# REL-NEXT: 0x000042d0 d0420000 2a000000 # RUN: ld.lld -shared -z rel -z rela %t.o -o %t2.so # RUN: llvm-readobj -d -r %t2.so | FileCheck --check-prefix=RELA %s @@ -41,9 +41,9 @@ # RELA-NEXT: PLTGOT {{.*}} # RELA-NEXT: PLTREL RELA # RELA: .rela.dyn { -# RELA-NEXT: R_386_RELATIVE - 0x42EC +# RELA-NEXT: R_386_RELATIVE - 0x42F0 # RELA-NEXT: R_386_GLOB_DAT func 0x0 -# RELA-NEXT: R_386_TLS_TPOFF tls 0x2A +# RELA-NEXT: R_386_TLS_TPOFF tls 0x0 # RELA-NEXT: R_386_32 _start 0x2A # RELA-NEXT: } # RELA-NEXT: .rela.plt { @@ -56,7 +56,7 @@ _start: movl func@GOT(%eax), %eax .section .text1,"awx" - movl %gs:tls@NTPOFF+42, %eax + movl tls@GOTNTPOFF(%eax), %eax .data .long .data diff --git a/lld/test/ELF/mips-tls-hilo.s b/lld/test/ELF/mips-tls-hilo.s index 4f1417d295258..92cf71be5ecc1 100644 --- a/lld/test/ELF/mips-tls-hilo.s +++ b/lld/test/ELF/mips-tls-hilo.s @@ -7,8 +7,10 @@ # RUN: llvm-objdump -d -t --no-show-raw-insn %t.exe | FileCheck --check-prefix=DIS %s # RUN: llvm-readobj -r -A %t.exe | FileCheck %s -# RUN: ld.lld %t.o -shared -o %t.so -# RUN: llvm-readobj -r -A %t.so | FileCheck -check-prefix=SO %s +# RUN: not ld.lld %t.o -shared -o /dev/null 2>&1 | FileCheck %s --check-prefix=ERR --implicit-check-not=error: + +# ERR: error: relocation R_MIPS_TLS_TPREL_HI16 against loc0 cannot be used with -shared +# ERR: error: relocation R_MIPS_TLS_TPREL_LO16 against loc0 cannot be used with -shared # DIS: 00000000 l O .tdata 00000000 loc0 diff --git a/lld/test/ELF/ppc64-local-exec-tls.s b/lld/test/ELF/ppc64-local-exec-tls.s index f657d96ad1f14..51dcb1a7395a1 100644 --- a/lld/test/ELF/ppc64-local-exec-tls.s +++ b/lld/test/ELF/ppc64-local-exec-tls.s @@ -4,6 +4,18 @@ // RUN: llvm-readelf -r %t.o | FileCheck --check-prefix=InputRelocs %s // RUN: llvm-objdump -d %t | FileCheck --check-prefix=Dis %s +// RUN: not ld.lld -shared %t.o -o /dev/null 2>&1 | FileCheck %s --check-prefix=ERR + +/// Reject local-exec TLS relocations for -shared. +// ERR: error: relocation R_PPC64_TPREL16_HA against a cannot be used with -shared +// ERR: error: relocation R_PPC64_TPREL16_LO against a cannot be used with -shared +// ERR: error: relocation R_PPC64_TPREL16 against b cannot be used with -shared +// ERR: error: relocation R_PPC64_TPREL16_HI against b cannot be used with -shared +// ERR: error: relocation R_PPC64_TPREL16_DS against b cannot be used with -shared +// ERR: error: relocation R_PPC64_TPREL16_LO_DS against b cannot be used with -shared +// ERR: error: relocation R_PPC64_TPREL16_HIGHESTA against b cannot be used with -shared +// ERR: error: relocation R_PPC64_TPREL16_HIGHERA against b cannot be used with -shared + .text .abiversion 2 .globl test_local_exec # -- Begin function test_local_exec diff --git a/lld/test/ELF/riscv-tls-le.s b/lld/test/ELF/riscv-tls-le.s index 860e6884c5dae..96a10e940218d 100644 --- a/lld/test/ELF/riscv-tls-le.s +++ b/lld/test/ELF/riscv-tls-le.s @@ -13,6 +13,13 @@ # RUN: ld.lld -pie %t.64.o -o %t.64 # RUN: llvm-objdump -d --no-show-raw-insn %t.64 | FileCheck --check-prefixes=LE %s +# RUN: not ld.lld -shared %t.32.o -o /dev/null 2>&1 | FileCheck %s --check-prefix=ERR --implicit-check-not=error: + +# ERR: error: relocation R_RISCV_TPREL_HI20 against .LANCHOR0 cannot be used with -shared +# ERR: error: relocation R_RISCV_TPREL_LO12_I against .LANCHOR0 cannot be used with -shared +# ERR: error: relocation R_RISCV_TPREL_HI20 against a cannot be used with -shared +# ERR: error: relocation R_RISCV_TPREL_LO12_S against a cannot be used with -shared + # NM: {{0*}}00000008 b .LANCHOR0 # NM: {{0*}}0000000c B a diff --git a/lld/test/ELF/tls.s b/lld/test/ELF/tls.s index 353a056ee20d8..567e44600ab05 100644 --- a/lld/test/ELF/tls.s +++ b/lld/test/ELF/tls.s @@ -4,6 +4,26 @@ // RUN: llvm-readobj -S -l --symbols %tout | FileCheck %s // RUN: llvm-objdump -d %tout | FileCheck %s --check-prefix=DIS +/// Reject local-exec TLS relocations for -shared, regardless of the preemptibility. +// RUN: not ld.lld -shared %t -o /dev/null 2>&1 | FileCheck %s --check-prefix=ERR +// RUN: not ld.lld -shared -Bsymbolic %t -o /dev/null 2>&1 | FileCheck %s --check-prefix=ERR + +// ERR: error: relocation R_X86_64_TPOFF32 against a cannot be used with -shared +// ERR-NEXT: defined in {{.*}} +// ERR-NEXT: referenced by {{.*}}:(.text+0x4) +// ERR-EMPTY: +// ERR-NEXT: error: relocation R_X86_64_TPOFF32 against b cannot be used with -shared +// ERR-NEXT: defined in {{.*}} +// ERR-NEXT: referenced by {{.*}}:(.text+0xC) +// ERR-EMPTY: +// ERR-NEXT: error: relocation R_X86_64_TPOFF32 against c cannot be used with -shared +// ERR-NEXT: defined in {{.*}} +// ERR-NEXT: referenced by {{.*}}:(.text+0x14) +// ERR-EMPTY: +// ERR-NEXT: error: relocation R_X86_64_TPOFF32 against d cannot be used with -shared +// ERR-NEXT: defined in {{.*}} +// ERR-NEXT: referenced by {{.*}}:(.text+0x1C) + .global _start _start: movl %fs:a@tpoff, %eax diff --git a/lld/test/ELF/x86-64-reloc-tpoff32-fpic.s b/lld/test/ELF/x86-64-reloc-tpoff32-fpic.s deleted file mode 100644 index edb04c1d4487b..0000000000000 --- a/lld/test/ELF/x86-64-reloc-tpoff32-fpic.s +++ /dev/null @@ -1,14 +0,0 @@ -# REQUIRES: x86 -# RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o -# RUN: not ld.lld %t.o -shared -o /dev/null 2>&1 | FileCheck %s - -# CHECK: relocation R_X86_64_TPOFF32 cannot be used against symbol var; recompile with -fPIC -# CHECK: >>> defined in {{.*}}.o -# CHECK: >>> referenced by {{.*}}.o:(.tdata+0xC) - -.section ".tdata", "awT", @progbits -.globl var -var: - -movq %fs:0, %rax -leaq var@TPOFF(%rax),%rax From 9a93f95fce91fb4616cee0f307b564b253789282 Mon Sep 17 00:00:00 2001 From: David Spickett Date: Mon, 21 Dec 2020 16:45:57 +0000 Subject: [PATCH 021/378] [clang] Fix expected errors in plugin attribute example b2ba6867eac10874bd279c739639bdb9e60c1996 was landed with updated error messages in the example file but not in the test file. --- clang/test/Frontend/plugin-attribute.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/test/Frontend/plugin-attribute.cpp b/clang/test/Frontend/plugin-attribute.cpp index 969105927be52..f02932d56c687 100644 --- a/clang/test/Frontend/plugin-attribute.cpp +++ b/clang/test/Frontend/plugin-attribute.cpp @@ -18,5 +18,5 @@ int var1 __attribute__((example("otherstring"))) = 1; // expected-warning {{'exa class Example { void __attribute__((example)) fn3(); // expected-error {{'example' attribute only allowed at file scope}} }; -void fn4() __attribute__((example(123))) { } // expected-error {{'example's first argument should be a string literal}} -void fn5() __attribute__((example("a","b", 3, 4.0))) { } // expected-error {{'example' attribute only allowed at most three arguments}} +void fn4() __attribute__((example(123))) { } // expected-error {{first argument to the 'example' attribute must be a string literal}} +void fn5() __attribute__((example("a","b", 3, 4.0))) { } // expected-error {{'example' attribute only accepts at most three arguments}} From 26c8f9081b6b1ca9358ac2ca38e8e603fb6f7d64 Mon Sep 17 00:00:00 2001 From: Thomas Raoux Date: Thu, 17 Dec 2020 16:26:07 -0800 Subject: [PATCH 022/378] [mlir[[vector] Extend Transfer read/write ops to support tensor types. Transfer_ops can now work on both buffers and tensor. Right now, lowering of the tensor case is not supported yet. Differential Revision: https://reviews.llvm.org/D93500 --- mlir/include/mlir/Dialect/Vector/VectorOps.h | 2 +- mlir/include/mlir/Dialect/Vector/VectorOps.td | 65 +++++--- .../include/mlir/Dialect/Vector/VectorUtils.h | 4 +- .../mlir/Interfaces/VectorInterfaces.td | 28 ++-- .../LegalizeStandardForSPIRV.cpp | 16 +- .../VectorToLLVM/ConvertVectorToLLVM.cpp | 46 +++--- .../VectorToROCDL/VectorToROCDL.cpp | 6 +- .../Conversion/VectorToSCF/VectorToSCF.cpp | 41 ++--- .../Dialect/Linalg/Transforms/Hoisting.cpp | 4 +- .../Linalg/Transforms/Vectorization.cpp | 4 +- mlir/lib/Dialect/Vector/VectorOps.cpp | 154 ++++++++++-------- .../Vector/VectorTransferOpTransforms.cpp | 4 +- mlir/lib/Dialect/Vector/VectorTransforms.cpp | 47 +++--- mlir/lib/Dialect/Vector/VectorUtils.cpp | 12 +- mlir/test/Dialect/Vector/invalid.mlir | 12 +- mlir/test/Dialect/Vector/ops.mlir | 48 ++++++ 16 files changed, 304 insertions(+), 189 deletions(-) diff --git a/mlir/include/mlir/Dialect/Vector/VectorOps.h b/mlir/include/mlir/Dialect/Vector/VectorOps.h index 95964665ced64..5540a56a4043c 100644 --- a/mlir/include/mlir/Dialect/Vector/VectorOps.h +++ b/mlir/include/mlir/Dialect/Vector/VectorOps.h @@ -126,7 +126,7 @@ namespace impl { /// Build the default minor identity map suitable for a vector transfer. This /// also handles the case memref<... x vector<...>> -> vector<...> in which the /// rank of the identity map must take the vector element type into account. -AffineMap getTransferMinorIdentityMap(MemRefType memRefType, +AffineMap getTransferMinorIdentityMap(ShapedType shapedType, VectorType vectorType); } // namespace impl } // end namespace vector diff --git a/mlir/include/mlir/Dialect/Vector/VectorOps.td b/mlir/include/mlir/Dialect/Vector/VectorOps.td index de77e3b034830..13aba2076ee93 100644 --- a/mlir/include/mlir/Dialect/Vector/VectorOps.td +++ b/mlir/include/mlir/Dialect/Vector/VectorOps.td @@ -1056,7 +1056,7 @@ def Vector_TransferReadOp : DeclareOpInterfaceMethods, DeclareOpInterfaceMethods ]>, - Arguments<(ins AnyMemRef:$memref, Variadic:$indices, + Arguments<(ins AnyShaped:$source, Variadic:$indices, AffineMapAttr:$permutation_map, AnyType:$padding, OptionalAttr:$masked)>, Results<(outs AnyVector:$vector)> { @@ -1065,15 +1065,16 @@ def Vector_TransferReadOp : let description = [{ The `vector.transfer_read` op performs a read from a slice within a - [MemRef](../LangRef.md#memref-type) supplied as its first operand - into a [vector](../LangRef.md#vector-type) of the same base elemental type. + [MemRef](../LangRef.md#memref-type) or a Ranked + [Tensor](../LangRef.md#tensor-type) supplied as its first operand into a + [vector](../LangRef.md#vector-type) of the same base elemental type. - A memref operand with vector element type, must have its vector element - type match a suffix (shape and element type) of the vector (e.g. + A memref/tensor operand with vector element type, must have its vector + element type match a suffix (shape and element type) of the vector (e.g. memref<3x2x6x4x3xf32>, vector<1x1x4x3xf32>). - The slice is further defined by a full-rank index within the MemRef, - supplied as the operands `2 .. 1 + rank(memref)`. + The slice is further defined by a full-rank index within the MemRef/Tensor, + supplied as the operands `2 .. 1 + rank(memref/tensor)`. The permutation_map [attribute](../LangRef.md#attributes) is an [affine-map](Affine.md#affine-maps) which specifies the transposition on the @@ -1084,8 +1085,9 @@ def Vector_TransferReadOp : The size of the slice is specified by the size of the vector, given as the return type. - An `ssa-value` of the same elemental type as the MemRef is provided as the - last operand to specify padding in the case of out-of-bounds accesses. + An `ssa-value` of the same elemental type as the MemRef/Tensor is provided + as the last operand to specify padding in the case of out-of-bounds + accesses. An optional boolean array attribute is provided to specify which dimensions of the transfer need masking. When a dimension is specified as not requiring @@ -1196,17 +1198,22 @@ def Vector_TransferReadOp : %4 = vector.transfer_read %arg1[%c3, %c3], %vf0 {permutation_map = (d0, d1)->(d0, d1)} : memref>, vector<1x1x4x3xf32> + + // Read from a tensor with vector element type. + %4 = vector.transfer_read %arg1[%c3, %c3], %vf0 + {permutation_map = (d0, d1)->(d0, d1)} + : tensor>, vector<1x1x4x3xf32> ``` }]; let builders = [ // Builder that sets padding to zero. - OpBuilderDAG<(ins "VectorType":$vector, "Value":$memref, + OpBuilderDAG<(ins "VectorType":$vector, "Value":$source, "ValueRange":$indices, "AffineMap":$permutationMap, CArg<"ArrayRef", "{}">:$maybeMasked)>, // Builder that sets permutation map (resp. padding) to // 'getMinorIdentityMap' (resp. zero). - OpBuilderDAG<(ins "VectorType":$vector, "Value":$memref, + OpBuilderDAG<(ins "VectorType":$vector, "Value":$source, "ValueRange":$indices, CArg<"ArrayRef", "{}">:$maybeMasked)> ]; @@ -1217,26 +1224,29 @@ def Vector_TransferWriteOp : Vector_Op<"transfer_write", [ DeclareOpInterfaceMethods, DeclareOpInterfaceMethods - ]>, - Arguments<(ins AnyVector:$vector, AnyMemRef:$memref, + ]>, + Arguments<(ins AnyVector:$vector, AnyShaped:$source, Variadic:$indices, AffineMapAttr:$permutation_map, - OptionalAttr:$masked)> { + OptionalAttr:$masked)>, + Results<(outs Optional:$result)> { let summary = "The vector.transfer_write op writes a supervector to memory."; let description = [{ The `vector.transfer_write` op performs a write from a [vector](../LangRef.md#vector-type), supplied as its first operand, into a - slice within a [MemRef](../LangRef.md#memref-type) of the same base - elemental type, supplied as its second operand. + slice within a [MemRef](../LangRef.md#memref-type) or a Ranked + [Tensor](../LangRef.md#tensor-type) of the same base elemental type, + supplied as its second operand. - A vector memref operand must have its vector element type match a suffix - (shape and element type) of the vector (e.g. memref<3x2x6x4x3xf32>, - vector<1x1x4x3xf32>). + A vector memref/tensor operand must have its vector element type match a + suffix (shape and element type) of the vector (e.g. memref<3x2x6x4x3xf32>, + vector<1x1x4x3xf32>). If the operand is a tensor, the operation returns a + new tensor of the same type. - The slice is further defined by a full-rank index within the MemRef, - supplied as the operands `3 .. 2 + rank(memref)`. + The slice is further defined by a full-rank index within the MemRef/Tensor, + supplied as the operands `3 .. 2 + rank(memref/tensor)`. The permutation_map [attribute](../LangRef.md#attributes) is an [affine-map](Affine.md#affine-maps) which specifies the transposition on the @@ -1280,15 +1290,24 @@ def Vector_TransferWriteOp : vector.transfer_write %4, %arg1[%c3, %c3] {permutation_map = (d0, d1)->(d0, d1)} : vector<1x1x4x3xf32>, memref> + + // return a tensor where the vector is inserted into the source tensor. + %5 = vector.transfer_write %4, %arg1[%c3, %c3] + {permutation_map = (d0, d1)->(d0, d1)} + : vector<1x1x4x3xf32>, tensor> ``` }]; let builders = [ // Builder that sets permutation map to 'getMinorIdentityMap'. - OpBuilderDAG<(ins "Value":$vector, "Value":$memref, "ValueRange":$indices, + OpBuilderDAG<(ins "Value":$vector, "Value":$source, "ValueRange":$indices, CArg<"ArrayRef", "{}">:$maybeMasked)>, - OpBuilderDAG<(ins "Value":$vector, "Value":$memref, "ValueRange":$indices, + OpBuilderDAG<(ins "Value":$vector, "Value":$source, "ValueRange":$indices, "AffineMap":$permutationMap)>, + OpBuilderDAG<(ins "Value":$vector, "Value":$source, "ValueRange":$indices, + "AffineMapAttr":$permutationMap, "ArrayAttr":$masked)>, + OpBuilderDAG<(ins "Value":$vector, "Value":$source, "ValueRange":$indices, + "AffineMap":$permutationMap, "ArrayAttr":$masked)>, ]; let hasFolder = 1; diff --git a/mlir/include/mlir/Dialect/Vector/VectorUtils.h b/mlir/include/mlir/Dialect/Vector/VectorUtils.h index f70fba819b66d..a06bc8cf65622 100644 --- a/mlir/include/mlir/Dialect/Vector/VectorUtils.h +++ b/mlir/include/mlir/Dialect/Vector/VectorUtils.h @@ -20,9 +20,9 @@ class AffineApplyOp; class AffineForOp; class AffineMap; class Location; -class MemRefType; class OpBuilder; class Operation; +class ShapedType; class Value; class VectorType; class VectorTransferOpInterface; @@ -157,7 +157,7 @@ makePermutationMap(Operation *op, ArrayRef indices, /// Build the default minor identity map suitable for a vector transfer. This /// also handles the case memref<... x vector<...>> -> vector<...> in which the /// rank of the identity map must take the vector element type into account. -AffineMap getTransferMinorIdentityMap(MemRefType memRefType, +AffineMap getTransferMinorIdentityMap(ShapedType shapedType, VectorType vectorType); /// Return true if we can prove that the transfer operations access disjoint diff --git a/mlir/include/mlir/Interfaces/VectorInterfaces.td b/mlir/include/mlir/Interfaces/VectorInterfaces.td index 73332afd8825e..3f60de5831c9f 100644 --- a/mlir/include/mlir/Interfaces/VectorInterfaces.td +++ b/mlir/include/mlir/Interfaces/VectorInterfaces.td @@ -47,7 +47,7 @@ def VectorUnrollOpInterface : OpInterface<"VectorUnrollOpInterface"> { def VectorTransferOpInterface : OpInterface<"VectorTransferOpInterface"> { let description = [{ - Encodes properties of an operation on vectors that can be unrolled. + Encodes properties of a transfer read or write operation. }]; let cppNamespace = "::mlir"; @@ -83,11 +83,11 @@ def VectorTransferOpInterface : OpInterface<"VectorTransferOpInterface"> { }] >, InterfaceMethod< - /*desc=*/"Return the memref operand.", + /*desc=*/"Return the memref or ranked tensor operand.", /*retTy=*/"Value", - /*methodName=*/"memref", + /*methodName=*/"source", /*args=*/(ins), - /*methodBody=*/"return $_op.memref();" + /*methodBody=*/"return $_op.source();" /*defaultImplementation=*/ >, InterfaceMethod< @@ -123,13 +123,13 @@ def VectorTransferOpInterface : OpInterface<"VectorTransferOpInterface"> { /*defaultImplementation=*/ >, InterfaceMethod< - /*desc=*/"Return the MemRefType.", - /*retTy=*/"MemRefType", - /*methodName=*/"getMemRefType", + /*desc=*/"Return the ShapedType.", + /*retTy=*/"ShapedType", + /*methodName=*/"getShapedType", /*args=*/(ins), /*methodBody=*/"", /*defaultImplementation=*/ - "return $_op.memref().getType().template cast();" + "return $_op.source().getType().template cast();" >, InterfaceMethod< /*desc=*/"Return the VectorType.", @@ -152,14 +152,14 @@ def VectorTransferOpInterface : OpInterface<"VectorTransferOpInterface"> { "return $_op.permutation_map().getNumResults();" >, InterfaceMethod< - /*desc=*/[{ Return the number of leading memref dimensions that do not + /*desc=*/[{ Return the number of leading shaped dimensions that do not participate in the permutation map.}], /*retTy=*/"unsigned", - /*methodName=*/"getLeadingMemRefRank", + /*methodName=*/"getLeadingShapedRank", /*args=*/(ins), /*methodBody=*/"", /*defaultImplementation=*/ - "return $_op.getMemRefType().getRank() - $_op.getTransferRank();" + "return $_op.getShapedType().getRank() - $_op.getTransferRank();" >, InterfaceMethod< /*desc=*/[{ Returns true if at least one of the dimensions is masked.}], @@ -178,8 +178,8 @@ def VectorTransferOpInterface : OpInterface<"VectorTransferOpInterface"> { /*desc=*/[{ Helper function to account for the fact that `permutationMap` results and `op.indices` sizes may not match and may not be aligned. The first - `getLeadingMemRefRank()` indices may just be indexed and not transferred - from/into the vector. + `getLeadingShapedRank()` indices may just be indexed and not + transferred from/into the vector. For example: ``` vector.transfer %0[%i, %j, %k, %c0] : @@ -195,7 +195,7 @@ def VectorTransferOpInterface : OpInterface<"VectorTransferOpInterface"> { /*methodBody=*/"", /*defaultImplementation=*/[{ for (int64_t resultIdx = 0, - indicesIdx = $_op.getLeadingMemRefRank(), + indicesIdx = $_op.getLeadingShapedRank(), eResult = $_op.getTransferRank(); resultIdx < eResult; ++resultIdx, ++indicesIdx) diff --git a/mlir/lib/Conversion/StandardToSPIRV/LegalizeStandardForSPIRV.cpp b/mlir/lib/Conversion/StandardToSPIRV/LegalizeStandardForSPIRV.cpp index ea483aa6abaeb..10d727df701b7 100644 --- a/mlir/lib/Conversion/StandardToSPIRV/LegalizeStandardForSPIRV.cpp +++ b/mlir/lib/Conversion/StandardToSPIRV/LegalizeStandardForSPIRV.cpp @@ -22,6 +22,17 @@ using namespace mlir; +/// Helpers to access the memref operand for each op. +static Value getMemRefOperand(LoadOp op) { return op.memref(); } + +static Value getMemRefOperand(vector::TransferReadOp op) { return op.source(); } + +static Value getMemRefOperand(StoreOp op) { return op.memref(); } + +static Value getMemRefOperand(vector::TransferWriteOp op) { + return op.source(); +} + namespace { /// Merges subview operation with load/transferRead operation. template @@ -141,7 +152,7 @@ template LogicalResult LoadOpOfSubViewFolder::matchAndRewrite(OpTy loadOp, PatternRewriter &rewriter) const { - auto subViewOp = loadOp.memref().template getDefiningOp(); + auto subViewOp = getMemRefOperand(loadOp).template getDefiningOp(); if (!subViewOp) { return failure(); } @@ -162,7 +173,8 @@ template LogicalResult StoreOpOfSubViewFolder::matchAndRewrite(OpTy storeOp, PatternRewriter &rewriter) const { - auto subViewOp = storeOp.memref().template getDefiningOp(); + auto subViewOp = + getMemRefOperand(storeOp).template getDefiningOp(); if (!subViewOp) { return failure(); } diff --git a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp index ebe07366f6eca..a982b90e0e93b 100644 --- a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp +++ b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp @@ -141,12 +141,10 @@ static Value buildVectorComparison(ConversionPatternRewriter &rewriter, return rewriter.create(loc, CmpIPredicate::slt, indices, bounds); } -// Helper that returns data layout alignment of an operation with memref. -template -LogicalResult getMemRefAlignment(LLVMTypeConverter &typeConverter, T op, - unsigned &align) { - Type elementTy = - typeConverter.convertType(op.getMemRefType().getElementType()); +// Helper that returns data layout alignment of a memref. +LogicalResult getMemRefAlignment(LLVMTypeConverter &typeConverter, + MemRefType memrefType, unsigned &align) { + Type elementTy = typeConverter.convertType(memrefType.getElementType()); if (!elementTy) return failure(); @@ -222,7 +220,8 @@ replaceTransferOpWithLoadOrStore(ConversionPatternRewriter &rewriter, TransferReadOp xferOp, ArrayRef operands, Value dataPtr) { unsigned align; - if (failed(getMemRefAlignment(typeConverter, xferOp, align))) + if (failed(getMemRefAlignment( + typeConverter, xferOp.getShapedType().cast(), align))) return failure(); rewriter.replaceOpWithNewOp(xferOp, dataPtr, align); return success(); @@ -243,7 +242,8 @@ replaceTransferOpWithMasked(ConversionPatternRewriter &rewriter, return failure(); unsigned align; - if (failed(getMemRefAlignment(typeConverter, xferOp, align))) + if (failed(getMemRefAlignment( + typeConverter, xferOp.getShapedType().cast(), align))) return failure(); rewriter.replaceOpWithNewOp( @@ -258,7 +258,8 @@ replaceTransferOpWithLoadOrStore(ConversionPatternRewriter &rewriter, TransferWriteOp xferOp, ArrayRef operands, Value dataPtr) { unsigned align; - if (failed(getMemRefAlignment(typeConverter, xferOp, align))) + if (failed(getMemRefAlignment( + typeConverter, xferOp.getShapedType().cast(), align))) return failure(); auto adaptor = TransferWriteOpAdaptor(operands); rewriter.replaceOpWithNewOp(xferOp, adaptor.vector(), dataPtr, @@ -272,7 +273,8 @@ replaceTransferOpWithMasked(ConversionPatternRewriter &rewriter, TransferWriteOp xferOp, ArrayRef operands, Value dataPtr, Value mask) { unsigned align; - if (failed(getMemRefAlignment(typeConverter, xferOp, align))) + if (failed(getMemRefAlignment( + typeConverter, xferOp.getShapedType().cast(), align))) return failure(); auto adaptor = TransferWriteOpAdaptor(operands); @@ -345,7 +347,8 @@ class VectorMaskedLoadOpConversion // Resolve alignment. unsigned align; - if (failed(getMemRefAlignment(*getTypeConverter(), load, align))) + if (failed(getMemRefAlignment(*getTypeConverter(), load.getMemRefType(), + align))) return failure(); auto vtype = typeConverter->convertType(load.getResultVectorType()); @@ -375,7 +378,8 @@ class VectorMaskedStoreOpConversion // Resolve alignment. unsigned align; - if (failed(getMemRefAlignment(*getTypeConverter(), store, align))) + if (failed(getMemRefAlignment(*getTypeConverter(), store.getMemRefType(), + align))) return failure(); auto vtype = typeConverter->convertType(store.getValueVectorType()); @@ -405,7 +409,8 @@ class VectorGatherOpConversion // Resolve alignment. unsigned align; - if (failed(getMemRefAlignment(*getTypeConverter(), gather, align))) + if (failed(getMemRefAlignment(*getTypeConverter(), gather.getMemRefType(), + align))) return failure(); // Get index ptrs. @@ -438,7 +443,8 @@ class VectorScatterOpConversion // Resolve alignment. unsigned align; - if (failed(getMemRefAlignment(*getTypeConverter(), scatter, align))) + if (failed(getMemRefAlignment(*getTypeConverter(), scatter.getMemRefType(), + align))) return failure(); // Get index ptrs. @@ -1182,8 +1188,11 @@ class VectorTransferConversion : public ConvertOpToLLVMPattern { xferOp.getVectorType().getRank(), xferOp->getContext())) return failure(); + auto memRefType = xferOp.getShapedType().template dyn_cast(); + if (!memRefType) + return failure(); // Only contiguous source tensors supported atm. - auto strides = computeContiguousStrides(xferOp.getMemRefType()); + auto strides = computeContiguousStrides(memRefType); if (!strides) return failure(); @@ -1192,10 +1201,9 @@ class VectorTransferConversion : public ConvertOpToLLVMPattern { }; Location loc = xferOp->getLoc(); - MemRefType memRefType = xferOp.getMemRefType(); if (auto memrefVectorElementType = - memRefType.getElementType().dyn_cast()) { + memRefType.getElementType().template dyn_cast()) { // Memref has vector element type. if (memrefVectorElementType.getElementType() != xferOp.getVectorType().getElementType()) @@ -1222,7 +1230,7 @@ class VectorTransferConversion : public ConvertOpToLLVMPattern { // address space 0. // TODO: support alignment when possible. Value dataPtr = this->getStridedElementPtr( - loc, memRefType, adaptor.memref(), adaptor.indices(), rewriter); + loc, memRefType, adaptor.source(), adaptor.indices(), rewriter); auto vecTy = toLLVMTy(xferOp.getVectorType()).template cast(); Value vectorDataPtr; @@ -1248,7 +1256,7 @@ class VectorTransferConversion : public ConvertOpToLLVMPattern { unsigned vecWidth = vecTy.getVectorNumElements(); unsigned lastIndex = llvm::size(xferOp.indices()) - 1; Value off = xferOp.indices()[lastIndex]; - Value dim = rewriter.create(loc, xferOp.memref(), lastIndex); + Value dim = rewriter.create(loc, xferOp.source(), lastIndex); Value mask = buildVectorComparison( rewriter, xferOp, enableIndexOptimizations, vecWidth, dim, &off); diff --git a/mlir/lib/Conversion/VectorToROCDL/VectorToROCDL.cpp b/mlir/lib/Conversion/VectorToROCDL/VectorToROCDL.cpp index e5474abfd3e37..973b116ef498c 100644 --- a/mlir/lib/Conversion/VectorToROCDL/VectorToROCDL.cpp +++ b/mlir/lib/Conversion/VectorToROCDL/VectorToROCDL.cpp @@ -89,7 +89,9 @@ class VectorTransferConversion : public ConvertOpToLLVMPattern { return failure(); // Obtain dataPtr and elementType from the memref. - MemRefType memRefType = xferOp.getMemRefType(); + auto memRefType = xferOp.getShapedType().template dyn_cast(); + if (!memRefType) + return failure(); // MUBUF instruction operate only on addresspace 0(unified) or 1(global) // In case of 3(LDS): fall back to vector->llvm pass // In case of 5(VGPR): wrong @@ -101,7 +103,7 @@ class VectorTransferConversion : public ConvertOpToLLVMPattern { // indices, so no need to calculate offset size in bytes again in // the MUBUF instruction. Value dataPtr = this->getStridedElementPtr( - loc, memRefType, adaptor.memref(), adaptor.indices(), rewriter); + loc, memRefType, adaptor.source(), adaptor.indices(), rewriter); // 1. Create and fill a <4 x i32> dwordConfig with: // 1st two elements holding the address of dataPtr. diff --git a/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp b/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp index c05e9da2c9498..b0f1b46b2459b 100644 --- a/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp +++ b/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp @@ -107,7 +107,7 @@ class NDTransferOpHelper { // TODO: when we go to k > 1-D vectors adapt minorRank. minorRank = 1; majorRank = vectorType.getRank() - minorRank; - leadingRank = xferOp.getLeadingMemRefRank(); + leadingRank = xferOp.getLeadingShapedRank(); majorVectorType = VectorType::get(vectorType.getShape().take_front(majorRank), vectorType.getElementType()); @@ -115,9 +115,9 @@ class NDTransferOpHelper { VectorType::get(vectorType.getShape().take_back(minorRank), vectorType.getElementType()); /// Memref of minor vector type is used for individual transfers. - memRefMinorVectorType = - MemRefType::get(majorVectorType.getShape(), minorVectorType, {}, - xferOp.getMemRefType().getMemorySpace()); + memRefMinorVectorType = MemRefType::get( + majorVectorType.getShape(), minorVectorType, {}, + xferOp.getShapedType().template cast().getMemorySpace()); } LogicalResult doReplace(); @@ -155,7 +155,7 @@ void NDTransferOpHelper::emitLoops( const MemRefBoundsCapture &)> loopBodyBuilder) { /// Loop nest operates on the major dimensions - MemRefBoundsCapture memrefBoundsCapture(xferOp.memref()); + MemRefBoundsCapture memrefBoundsCapture(xferOp.source()); if (options.unroll) { auto shape = majorVectorType.getShape(); @@ -272,9 +272,9 @@ LogicalResult NDTransferOpHelper::doReplace() { indexing.append(leadingOffsets.begin(), leadingOffsets.end()); indexing.append(majorIvsPlusOffsets.begin(), majorIvsPlusOffsets.end()); indexing.append(minorOffsets.begin(), minorOffsets.end()); - Value memref = xferOp.memref(); + Value memref = xferOp.source(); auto map = - getTransferMinorIdentityMap(xferOp.getMemRefType(), minorVectorType); + getTransferMinorIdentityMap(xferOp.getShapedType(), minorVectorType); ArrayAttr masked; if (!xferOp.isMaskedDim(xferOp.getVectorType().getRank() - 1)) { OpBuilder &b = ScopedContext::getBuilderRef(); @@ -379,13 +379,13 @@ LogicalResult NDTransferOpHelper::doReplace() { else result = std_load(alloc, majorIvs); auto map = - getTransferMinorIdentityMap(xferOp.getMemRefType(), minorVectorType); + getTransferMinorIdentityMap(xferOp.getShapedType(), minorVectorType); ArrayAttr masked; if (!xferOp.isMaskedDim(xferOp.getVectorType().getRank() - 1)) { OpBuilder &b = ScopedContext::getBuilderRef(); masked = b.getBoolArrayAttr({false}); } - vector_transfer_write(result, xferOp.memref(), indexing, + vector_transfer_write(result, xferOp.source(), indexing, AffineMapAttr::get(map), masked); }; @@ -422,7 +422,7 @@ template static int computeCoalescedIndex(TransferOpTy transfer) { // rank of the remote memory access, coalescing behavior occurs on the // innermost memory dimension. - auto remoteRank = transfer.getMemRefType().getRank(); + auto remoteRank = transfer.getShapedType().getRank(); // Iterate over the results expressions of the permutation map to determine // the loop order for creating pointwise copies between remote and local // memories. @@ -536,13 +536,14 @@ LogicalResult VectorTransferRewriter::matchAndRewrite( using namespace mlir::edsc::op; TransferReadOp transfer = cast(op); - + auto memRefType = transfer.getShapedType().dyn_cast(); + if (!memRefType) + return failure(); // Fall back to a loop if the fastest varying stride is not 1 or it is // permuted. int64_t offset; SmallVector strides; - auto successStrides = - getStridesAndOffset(transfer.getMemRefType(), strides, offset); + auto successStrides = getStridesAndOffset(memRefType, strides, offset); if (succeeded(successStrides) && strides.back() == 1 && transfer.permutation_map().isMinorIdentity()) { // If > 1D, emit a bunch of loops around 1-D vector transfers. @@ -557,8 +558,8 @@ LogicalResult VectorTransferRewriter::matchAndRewrite( // Conservative lowering to scalar load / stores. // 1. Setup all the captures. ScopedContext scope(rewriter, transfer.getLoc()); - StdIndexedValue remote(transfer.memref()); - MemRefBoundsCapture memRefBoundsCapture(transfer.memref()); + StdIndexedValue remote(transfer.source()); + MemRefBoundsCapture memRefBoundsCapture(transfer.source()); VectorBoundsCapture vectorBoundsCapture(transfer.vector()); int coalescedIdx = computeCoalescedIndex(transfer); // Swap the vectorBoundsCapture which will reorder loop bounds. @@ -621,13 +622,15 @@ LogicalResult VectorTransferRewriter::matchAndRewrite( using namespace edsc::op; TransferWriteOp transfer = cast(op); + auto memRefType = transfer.getShapedType().template dyn_cast(); + if (!memRefType) + return failure(); // Fall back to a loop if the fastest varying stride is not 1 or it is // permuted. int64_t offset; SmallVector strides; - auto successStrides = - getStridesAndOffset(transfer.getMemRefType(), strides, offset); + auto successStrides = getStridesAndOffset(memRefType, strides, offset); if (succeeded(successStrides) && strides.back() == 1 && transfer.permutation_map().isMinorIdentity()) { // If > 1D, emit a bunch of loops around 1-D vector transfers. @@ -641,8 +644,8 @@ LogicalResult VectorTransferRewriter::matchAndRewrite( // 1. Setup all the captures. ScopedContext scope(rewriter, transfer.getLoc()); - StdIndexedValue remote(transfer.memref()); - MemRefBoundsCapture memRefBoundsCapture(transfer.memref()); + StdIndexedValue remote(transfer.source()); + MemRefBoundsCapture memRefBoundsCapture(transfer.source()); Value vectorValue(transfer.vector()); VectorBoundsCapture vectorBoundsCapture(transfer.vector()); int coalescedIdx = computeCoalescedIndex(transfer); diff --git a/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp b/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp index 9e7e7efdd1361..a1797fde7da6b 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp @@ -111,7 +111,7 @@ void mlir::linalg::hoistRedundantVectorTransfers(FuncOp func) { vector::TransferWriteOp transferWrite; for (auto *sliceOp : llvm::reverse(forwardSlice)) { auto candidateWrite = dyn_cast(sliceOp); - if (!candidateWrite || candidateWrite.memref() != transferRead.memref()) + if (!candidateWrite || candidateWrite.source() != transferRead.source()) continue; transferWrite = candidateWrite; } @@ -142,7 +142,7 @@ void mlir::linalg::hoistRedundantVectorTransfers(FuncOp func) { DominanceInfo dom(loop); if (!dom.properlyDominates(transferRead.getOperation(), transferWrite)) return WalkResult::advance(); - for (auto &use : transferRead.memref().getUses()) { + for (auto &use : transferRead.source().getUses()) { if (!dom.properlyDominates(loop, use.getOwner())) continue; if (use.getOwner() == transferRead.getOperation() || diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp index 2df1a9469eabc..7165ee775e9c6 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp @@ -411,7 +411,7 @@ LogicalResult LinalgCopyVTRForwardingPattern::matchAndRewrite( vector::TransferReadOp xferOp, PatternRewriter &rewriter) const { // Transfer into `view`. - Value viewOrAlloc = xferOp.memref(); + Value viewOrAlloc = xferOp.source(); if (!viewOrAlloc.getDefiningOp() && !viewOrAlloc.getDefiningOp()) return failure(); @@ -487,7 +487,7 @@ LogicalResult LinalgCopyVTRForwardingPattern::matchAndRewrite( LogicalResult LinalgCopyVTWForwardingPattern::matchAndRewrite( vector::TransferWriteOp xferOp, PatternRewriter &rewriter) const { // Transfer into `viewOrAlloc`. - Value viewOrAlloc = xferOp.memref(); + Value viewOrAlloc = xferOp.source(); if (!viewOrAlloc.getDefiningOp() && !viewOrAlloc.getDefiningOp()) return failure(); diff --git a/mlir/lib/Dialect/Vector/VectorOps.cpp b/mlir/lib/Dialect/Vector/VectorOps.cpp index 5c1f377e589e3..a3ad355d30b26 100644 --- a/mlir/lib/Dialect/Vector/VectorOps.cpp +++ b/mlir/lib/Dialect/Vector/VectorOps.cpp @@ -1890,41 +1890,43 @@ static LogicalResult verifyPermutationMap(AffineMap permutationMap, return success(); } -static LogicalResult verifyTransferOp(Operation *op, MemRefType memrefType, +static LogicalResult verifyTransferOp(Operation *op, ShapedType shapedType, VectorType vectorType, AffineMap permutationMap, ArrayAttr optionalMasked) { - auto memrefElementType = memrefType.getElementType(); - if (auto memrefVectorElementType = memrefElementType.dyn_cast()) { - // Memref has vector element type. - - unsigned memrefVecSize = memrefVectorElementType.getElementTypeBitWidth() * - memrefVectorElementType.getShape().back(); + if (!shapedType.isa()) + return op->emitOpError( + "requires source to be a memref or ranked tensor type"); + auto elementType = shapedType.getElementType(); + if (auto vectorElementType = elementType.dyn_cast()) { + // Memref or tensor has vector element type. + unsigned sourceVecSize = vectorElementType.getElementTypeBitWidth() * + vectorElementType.getShape().back(); unsigned resultVecSize = vectorType.getElementTypeBitWidth() * vectorType.getShape().back(); - if (resultVecSize % memrefVecSize != 0) + if (resultVecSize % sourceVecSize != 0) return op->emitOpError( "requires the bitwidth of the minor 1-D vector to be an integral " - "multiple of the bitwidth of the minor 1-D vector of the memref"); + "multiple of the bitwidth of the minor 1-D vector of the source"); - unsigned memrefVecEltRank = memrefVectorElementType.getRank(); + unsigned sourceVecEltRank = vectorElementType.getRank(); unsigned resultVecRank = vectorType.getRank(); - if (memrefVecEltRank > resultVecRank) + if (sourceVecEltRank > resultVecRank) return op->emitOpError( - "requires memref vector element and vector result ranks to match."); - unsigned rankOffset = resultVecRank - memrefVecEltRank; + "requires source vector element and vector result ranks to match."); + unsigned rankOffset = resultVecRank - sourceVecEltRank; // Check that permutation map results match 'rankOffset' of vector type. if (permutationMap.getNumResults() != rankOffset) return op->emitOpError("requires a permutation_map with result dims of " "the same rank as the vector type"); } else { - // Memref has scalar element type. + // Memref or tensor has scalar element type. unsigned resultVecSize = vectorType.getElementTypeBitWidth() * vectorType.getShape().back(); - if (resultVecSize % memrefElementType.getIntOrFloatBitWidth() != 0) + if (resultVecSize % elementType.getIntOrFloatBitWidth() != 0) return op->emitOpError( "requires the bitwidth of the minor 1-D vector to be an integral " - "multiple of the bitwidth of the memref element type"); + "multiple of the bitwidth of the source element type"); // Check that permutation map results match rank of vector type. if (permutationMap.getNumResults() != vectorType.getRank()) @@ -1934,9 +1936,9 @@ static LogicalResult verifyTransferOp(Operation *op, MemRefType memrefType, if (permutationMap.getNumSymbols() != 0) return op->emitOpError("requires permutation_map without symbols"); - if (permutationMap.getNumInputs() != memrefType.getRank()) + if (permutationMap.getNumInputs() != shapedType.getRank()) return op->emitOpError("requires a permutation_map with input dims of the " - "same rank as the memref type"); + "same rank as the source type"); if (optionalMasked) { if (permutationMap.getNumResults() != @@ -1978,7 +1980,7 @@ void TransferReadOp::build(OpBuilder &builder, OperationState &result, static void printTransferAttrs(OpAsmPrinter &p, VectorTransferOpInterface op) { SmallVector elidedAttrs; if (op.permutation_map() == - getTransferMinorIdentityMap(op.getMemRefType(), op.getVectorType())) + getTransferMinorIdentityMap(op.getShapedType(), op.getVectorType())) elidedAttrs.push_back(op.getPermutationMapAttrName()); bool elideMasked = true; if (auto maybeMasked = op.masked()) { @@ -1995,21 +1997,21 @@ static void printTransferAttrs(OpAsmPrinter &p, VectorTransferOpInterface op) { } static void print(OpAsmPrinter &p, TransferReadOp op) { - p << op.getOperationName() << " " << op.memref() << "[" << op.indices() + p << op.getOperationName() << " " << op.source() << "[" << op.indices() << "], " << op.padding(); printTransferAttrs(p, cast(op.getOperation())); - p << " : " << op.getMemRefType() << ", " << op.getVectorType(); + p << " : " << op.getShapedType() << ", " << op.getVectorType(); } static ParseResult parseTransferReadOp(OpAsmParser &parser, OperationState &result) { llvm::SMLoc typesLoc; - OpAsmParser::OperandType memrefInfo; + OpAsmParser::OperandType sourceInfo; SmallVector indexInfo; OpAsmParser::OperandType paddingInfo; SmallVector types; // Parsing with support for paddingValue. - if (parser.parseOperand(memrefInfo) || + if (parser.parseOperand(sourceInfo) || parser.parseOperandList(indexInfo, OpAsmParser::Delimiter::Square) || parser.parseComma() || parser.parseOperand(paddingInfo) || parser.parseOptionalAttrDict(result.attributes) || @@ -2018,48 +2020,48 @@ static ParseResult parseTransferReadOp(OpAsmParser &parser, if (types.size() != 2) return parser.emitError(typesLoc, "requires two types"); auto indexType = parser.getBuilder().getIndexType(); - MemRefType memRefType = types[0].dyn_cast(); - if (!memRefType) - return parser.emitError(typesLoc, "requires memref type"); + auto shapedType = types[0].dyn_cast(); + if (!shapedType || !shapedType.isa()) + return parser.emitError(typesLoc, "requires memref or ranked tensor type"); VectorType vectorType = types[1].dyn_cast(); if (!vectorType) return parser.emitError(typesLoc, "requires vector type"); auto permutationAttrName = TransferReadOp::getPermutationMapAttrName(); auto attr = result.attributes.get(permutationAttrName); if (!attr) { - auto permMap = getTransferMinorIdentityMap(memRefType, vectorType); + auto permMap = getTransferMinorIdentityMap(shapedType, vectorType); result.attributes.set(permutationAttrName, AffineMapAttr::get(permMap)); } return failure( - parser.resolveOperand(memrefInfo, memRefType, result.operands) || + parser.resolveOperand(sourceInfo, shapedType, result.operands) || parser.resolveOperands(indexInfo, indexType, result.operands) || - parser.resolveOperand(paddingInfo, memRefType.getElementType(), + parser.resolveOperand(paddingInfo, shapedType.getElementType(), result.operands) || parser.addTypeToList(vectorType, result.types)); } static LogicalResult verify(TransferReadOp op) { - // Consistency of elemental types in memref and vector. - MemRefType memrefType = op.getMemRefType(); + // Consistency of elemental types in source and vector. + ShapedType shapedType = op.getShapedType(); VectorType vectorType = op.getVectorType(); auto paddingType = op.padding().getType(); auto permutationMap = op.permutation_map(); - auto memrefElementType = memrefType.getElementType(); + auto sourceElementType = shapedType.getElementType(); - if (static_cast(op.indices().size()) != memrefType.getRank()) - return op.emitOpError("requires ") << memrefType.getRank() << " indices"; + if (static_cast(op.indices().size()) != shapedType.getRank()) + return op.emitOpError("requires ") << shapedType.getRank() << " indices"; - if (failed(verifyTransferOp(op.getOperation(), memrefType, vectorType, + if (failed(verifyTransferOp(op.getOperation(), shapedType, vectorType, permutationMap, op.masked() ? *op.masked() : ArrayAttr()))) return failure(); - if (auto memrefVectorElementType = memrefElementType.dyn_cast()) { - // Memref has vector element type. - // Check that 'memrefVectorElementType' and 'paddingType' types match. - if (memrefVectorElementType != paddingType) + if (auto sourceVectorElementType = sourceElementType.dyn_cast()) { + // Source has vector element type. + // Check that 'sourceVectorElementType' and 'paddingType' types match. + if (sourceVectorElementType != paddingType) return op.emitOpError( - "requires memref element type and padding type to match."); + "requires source element type and padding type to match."); } else { // Check that 'paddingType' is valid to store in a vector type. @@ -2067,9 +2069,9 @@ static LogicalResult verify(TransferReadOp op) { return op.emitOpError("requires valid padding vector elemental type"); // Check that padding type and vector element types match. - if (paddingType != memrefElementType) + if (paddingType != sourceElementType) return op.emitOpError( - "requires formal padding and memref of the same elemental type"); + "requires formal padding and source of the same elemental type"); } return verifyPermutationMap(permutationMap, @@ -2096,18 +2098,18 @@ static LogicalResult foldMemRefCast(Operation *op) { template static bool isInBounds(TransferOp op, int64_t resultIdx, int64_t indicesIdx) { // TODO: support more aggressive createOrFold on: - // `op.indices()[indicesIdx] + vectorType < dim(op.memref(), indicesIdx)` - if (op.getMemRefType().isDynamicDim(indicesIdx)) + // `op.indices()[indicesIdx] + vectorType < dim(op.source(), indicesIdx)` + if (op.getShapedType().isDynamicDim(indicesIdx)) return false; Value index = op.indices()[indicesIdx]; auto cstOp = index.getDefiningOp(); if (!cstOp) return false; - int64_t memrefSize = op.getMemRefType().getDimSize(indicesIdx); + int64_t sourceSize = op.getShapedType().getDimSize(indicesIdx); int64_t vectorSize = op.getVectorType().getDimSize(resultIdx); - return cstOp.getValue() + vectorSize <= memrefSize; + return cstOp.getValue() + vectorSize <= sourceSize; } template @@ -2159,33 +2161,51 @@ Optional> TransferReadOp::getShapeForUnroll() { /// Builder that sets permutation map to 'getMinorIdentityMap'. void TransferWriteOp::build(OpBuilder &builder, OperationState &result, - Value vector, Value memref, ValueRange indices, + Value vector, Value source, ValueRange indices, ArrayRef maybeMasked) { auto vectorType = vector.getType().cast(); auto permMap = getTransferMinorIdentityMap( - memref.getType().cast(), vectorType); + source.getType().cast(), vectorType); if (maybeMasked.empty()) - return build(builder, result, vector, memref, indices, permMap, + return build(builder, result, vector, source, indices, permMap, ArrayAttr()); ArrayAttr maskedArrayAttr = builder.getBoolArrayAttr(maybeMasked); - build(builder, result, vector, memref, indices, permMap, maskedArrayAttr); + build(builder, result, vector, source, indices, permMap, maskedArrayAttr); } void TransferWriteOp::build(OpBuilder &builder, OperationState &result, - Value vector, Value memref, ValueRange indices, + Value vector, Value source, ValueRange indices, AffineMap permutationMap) { - build(builder, result, vector, memref, indices, permutationMap, + build(builder, result, vector, source, indices, permutationMap, /*maybeMasked=*/ArrayAttr()); } +void TransferWriteOp::build(OpBuilder &builder, OperationState &result, + Value vector, Value source, ValueRange indices, + AffineMapAttr permutationMap, + /*optional*/ ArrayAttr masked) { + Type resultType = source.getType().dyn_cast(); + build(builder, result, resultType, vector, source, indices, permutationMap, + masked); +} + +void TransferWriteOp::build(OpBuilder &builder, OperationState &result, + Value vector, Value source, ValueRange indices, + AffineMap permutationMap, + /*optional*/ ArrayAttr masked) { + Type resultType = source.getType().dyn_cast(); + build(builder, result, resultType, vector, source, indices, permutationMap, + masked); +} + static ParseResult parseTransferWriteOp(OpAsmParser &parser, OperationState &result) { llvm::SMLoc typesLoc; - OpAsmParser::OperandType vectorInfo, memrefInfo; + OpAsmParser::OperandType vectorInfo, sourceInfo; SmallVector indexInfo; SmallVector types; if (parser.parseOperand(vectorInfo) || parser.parseComma() || - parser.parseOperand(memrefInfo) || + parser.parseOperand(sourceInfo) || parser.parseOperandList(indexInfo, OpAsmParser::Delimiter::Square) || parser.parseOptionalAttrDict(result.attributes) || parser.getCurrentLocation(&typesLoc) || parser.parseColonTypeList(types)) @@ -2196,38 +2216,40 @@ static ParseResult parseTransferWriteOp(OpAsmParser &parser, VectorType vectorType = types[0].dyn_cast(); if (!vectorType) return parser.emitError(typesLoc, "requires vector type"); - MemRefType memRefType = types[1].dyn_cast(); - if (!memRefType) - return parser.emitError(typesLoc, "requires memref type"); + ShapedType shapedType = types[1].dyn_cast(); + if (!shapedType || !shapedType.isa()) + return parser.emitError(typesLoc, "requires memref or ranked tensor type"); auto permutationAttrName = TransferWriteOp::getPermutationMapAttrName(); auto attr = result.attributes.get(permutationAttrName); if (!attr) { - auto permMap = getTransferMinorIdentityMap(memRefType, vectorType); + auto permMap = getTransferMinorIdentityMap(shapedType, vectorType); result.attributes.set(permutationAttrName, AffineMapAttr::get(permMap)); } return failure( parser.resolveOperand(vectorInfo, vectorType, result.operands) || - parser.resolveOperand(memrefInfo, memRefType, result.operands) || - parser.resolveOperands(indexInfo, indexType, result.operands)); + parser.resolveOperand(sourceInfo, shapedType, result.operands) || + parser.resolveOperands(indexInfo, indexType, result.operands) || + (shapedType.isa() && + parser.addTypeToList(shapedType, result.types))); } static void print(OpAsmPrinter &p, TransferWriteOp op) { - p << op.getOperationName() << " " << op.vector() << ", " << op.memref() << "[" + p << op.getOperationName() << " " << op.vector() << ", " << op.source() << "[" << op.indices() << "]"; printTransferAttrs(p, cast(op.getOperation())); - p << " : " << op.getVectorType() << ", " << op.getMemRefType(); + p << " : " << op.getVectorType() << ", " << op.getShapedType(); } static LogicalResult verify(TransferWriteOp op) { // Consistency of elemental types in memref and vector. - MemRefType memrefType = op.getMemRefType(); + ShapedType shapedType = op.getShapedType(); VectorType vectorType = op.getVectorType(); auto permutationMap = op.permutation_map(); - if (llvm::size(op.indices()) != memrefType.getRank()) - return op.emitOpError("requires ") << memrefType.getRank() << " indices"; + if (llvm::size(op.indices()) != shapedType.getRank()) + return op.emitOpError("requires ") << shapedType.getRank() << " indices"; - if (failed(verifyTransferOp(op.getOperation(), memrefType, vectorType, + if (failed(verifyTransferOp(op.getOperation(), shapedType, vectorType, permutationMap, op.masked() ? *op.masked() : ArrayAttr()))) return failure(); diff --git a/mlir/lib/Dialect/Vector/VectorTransferOpTransforms.cpp b/mlir/lib/Dialect/Vector/VectorTransferOpTransforms.cpp index b7de983dd3b1c..ea1189d53b311 100644 --- a/mlir/lib/Dialect/Vector/VectorTransferOpTransforms.cpp +++ b/mlir/lib/Dialect/Vector/VectorTransferOpTransforms.cpp @@ -94,7 +94,7 @@ void TransferOptimization::deadStoreOp(vector::TransferWriteOp write) { << "\n"); llvm::SmallVector reads; Operation *firstOverwriteCandidate = nullptr; - for (auto *user : write.memref().getUsers()) { + for (auto *user : write.source().getUsers()) { if (user == write.getOperation()) continue; if (auto nextWrite = dyn_cast(user)) { @@ -163,7 +163,7 @@ void TransferOptimization::storeToLoadForwarding(vector::TransferReadOp read) { << "\n"); SmallVector blockingWrites; vector::TransferWriteOp lastwrite = nullptr; - for (Operation *user : read.memref().getUsers()) { + for (Operation *user : read.source().getUsers()) { if (isa(user)) continue; if (auto write = dyn_cast(user)) { diff --git a/mlir/lib/Dialect/Vector/VectorTransforms.cpp b/mlir/lib/Dialect/Vector/VectorTransforms.cpp index 664426960beb3..1e58a759d305a 100644 --- a/mlir/lib/Dialect/Vector/VectorTransforms.cpp +++ b/mlir/lib/Dialect/Vector/VectorTransforms.cpp @@ -597,7 +597,7 @@ static Value unrollTransferReadOp(vector::TransferReadOp readOp, Location loc = readOp.getLoc(); auto memrefElementType = - readOp.memref().getType().cast().getElementType(); + readOp.source().getType().cast().getElementType(); auto tupleType = generateExtractSlicesOpResultType( sourceVectorType, targetShape, strides, builder); int64_t numSlices = tupleType.size(); @@ -612,7 +612,7 @@ static Value unrollTransferReadOp(vector::TransferReadOp readOp, // `masked` attribute propagates conservatively: if the coarse op didn't // need masking, the fine op doesn't either. vectorTupleValues[index] = builder.create( - loc, sliceVectorType, readOp.memref(), sliceIndices, + loc, sliceVectorType, readOp.source(), sliceIndices, readOp.permutation_map(), readOp.padding(), readOp.masked() ? *readOp.masked() : ArrayAttr()); }; @@ -644,14 +644,14 @@ mlir::vector::unrollTransferWriteOp(OpBuilder &builder, Operation *op, Value tuple = builder.create( loc, tupleType, writeOp.vector(), targetShape, strides); auto memrefElementType = - writeOp.memref().getType().cast().getElementType(); + writeOp.source().getType().cast().getElementType(); SmallVector indices(writeOp.indices().begin(), writeOp.indices().end()); auto createSlice = [&](unsigned index, ArrayRef sliceIndices) { auto element = builder.create( loc, tupleType.getType(index), tuple, builder.getI64IntegerAttr(index)); builder.create( - loc, element.getResult(), writeOp.memref(), sliceIndices, + loc, element.getResult(), writeOp.source(), sliceIndices, writeOp.permutation_map(), writeOp.masked() ? *writeOp.masked() : ArrayAttr()); }; @@ -760,7 +760,7 @@ struct SplitTransferWriteOp : public OpRewritePattern { Location loc = xferWriteOp.getLoc(); auto memrefElementType = - xferWriteOp.memref().getType().cast().getElementType(); + xferWriteOp.source().getType().cast().getElementType(); SmallVector indices(xferWriteOp.indices().begin(), xferWriteOp.indices().end()); auto createSlice = [&](unsigned index, ArrayRef sliceIndices) { @@ -768,7 +768,7 @@ struct SplitTransferWriteOp : public OpRewritePattern { // `masked` attribute propagates conservatively: if the coarse op didn't // need masking, the fine op doesn't either. rewriter.create( - loc, tupleOp.getOperand(index), xferWriteOp.memref(), sliceIndices, + loc, tupleOp.getOperand(index), xferWriteOp.source(), sliceIndices, xferWriteOp.permutation_map(), xferWriteOp.masked() ? *xferWriteOp.masked() : ArrayAttr()); }; @@ -2142,7 +2142,7 @@ static Value createScopedInBoundsCond(VectorTransferOpInterface xferOp) { // Fold or create the check that `index + vector_size` <= `memref_size`. Value sum = xferOp.indices()[indicesIdx] + std_constant_index(vectorSize); Value cond = - createScopedFoldedSLE(sum, std_dim(xferOp.memref(), indicesIdx)); + createScopedFoldedSLE(sum, std_dim(xferOp.source(), indicesIdx)); if (!cond) return; // Conjunction over all dims for which we are in-bounds. @@ -2207,23 +2207,23 @@ static MemRefType getCastCompatibleMemRefType(MemRefType aT, MemRefType bT) { } /// Operates under a scoped context to build the intersection between the -/// view `xferOp.memref()` @ `xferOp.indices()` and the view `alloc`. +/// view `xferOp.source()` @ `xferOp.indices()` and the view `alloc`. // TODO: view intersection/union/differences should be a proper std op. static Value createScopedSubViewIntersection(VectorTransferOpInterface xferOp, Value alloc) { using namespace edsc::intrinsics; - int64_t memrefRank = xferOp.getMemRefType().getRank(); + int64_t memrefRank = xferOp.getShapedType().getRank(); // TODO: relax this precondition, will require rank-reducing subviews. assert(memrefRank == alloc.getType().cast().getRank() && "Expected memref rank to match the alloc rank"); Value one = std_constant_index(1); ValueRange leadingIndices = - xferOp.indices().take_front(xferOp.getLeadingMemRefRank()); + xferOp.indices().take_front(xferOp.getLeadingShapedRank()); SmallVector sizes; sizes.append(leadingIndices.begin(), leadingIndices.end()); xferOp.zipResultAndIndexing([&](int64_t resultIdx, int64_t indicesIdx) { using MapList = ArrayRef>; - Value dimMemRef = std_dim(xferOp.memref(), indicesIdx); + Value dimMemRef = std_dim(xferOp.source(), indicesIdx); Value dimAlloc = std_dim(alloc, resultIdx); Value index = xferOp.indices()[indicesIdx]; AffineExpr i, j, k; @@ -2235,7 +2235,7 @@ static Value createScopedSubViewIntersection(VectorTransferOpInterface xferOp, ValueRange{dimMemRef, index, dimAlloc}); sizes.push_back(affineMin); }); - return std_sub_view(xferOp.memref(), xferOp.indices(), sizes, + return std_sub_view(xferOp.source(), xferOp.indices(), sizes, SmallVector(memrefRank, one)); } @@ -2263,12 +2263,12 @@ static scf::IfOp createScopedFullPartialLinalgCopy( using namespace edsc::intrinsics; scf::IfOp fullPartialIfOp; Value zero = std_constant_index(0); - Value memref = xferOp.memref(); + Value memref = xferOp.source(); conditionBuilder( returnTypes, inBoundsCond, [&]() -> scf::ValueVector { Value res = memref; - if (compatibleMemRefType != xferOp.getMemRefType()) + if (compatibleMemRefType != xferOp.getShapedType()) res = std_memref_cast(memref, compatibleMemRefType); scf::ValueVector viewAndIndices{res}; viewAndIndices.insert(viewAndIndices.end(), xferOp.indices().begin(), @@ -2317,12 +2317,12 @@ static scf::IfOp createScopedFullPartialVectorTransferRead( using namespace edsc::intrinsics; scf::IfOp fullPartialIfOp; Value zero = std_constant_index(0); - Value memref = xferOp.memref(); + Value memref = xferOp.source(); conditionBuilder( returnTypes, inBoundsCond, [&]() -> scf::ValueVector { Value res = memref; - if (compatibleMemRefType != xferOp.getMemRefType()) + if (compatibleMemRefType != xferOp.getShapedType()) res = std_memref_cast(memref, compatibleMemRefType); scf::ValueVector viewAndIndices{res}; viewAndIndices.insert(viewAndIndices.end(), xferOp.indices().begin(), @@ -2376,7 +2376,7 @@ static scf::IfOp createScopedFullPartialVectorTransferRead( /// /// Preconditions: /// 1. `xferOp.permutation_map()` must be a minor identity map -/// 2. the rank of the `xferOp.memref()` and the rank of the `xferOp.vector()` +/// 2. the rank of the `xferOp.source()` and the rank of the `xferOp.vector()` /// must be equal. This will be relaxed in the future but requires /// rank-reducing subviews. LogicalResult mlir::vector::splitFullAndPartialTransfer( @@ -2404,8 +2404,8 @@ LogicalResult mlir::vector::splitFullAndPartialTransfer( return failure(); OpBuilder::InsertionGuard guard(b); - if (xferOp.memref().getDefiningOp()) - b.setInsertionPointAfter(xferOp.memref().getDefiningOp()); + if (Operation *sourceOp = xferOp.source().getDefiningOp()) + b.setInsertionPointAfter(sourceOp); else b.setInsertionPoint(xferOp); ScopedContext scope(b, xferOp.getLoc()); @@ -2426,8 +2426,9 @@ LogicalResult mlir::vector::splitFullAndPartialTransfer( b.getI64IntegerAttr(32)); } - MemRefType compatibleMemRefType = getCastCompatibleMemRefType( - xferOp.getMemRefType(), alloc.getType().cast()); + MemRefType compatibleMemRefType = + getCastCompatibleMemRefType(xferOp.getShapedType().cast(), + alloc.getType().cast()); // Read case: full fill + partial copy -> unmasked vector.xfer_read. SmallVector returnTypes(1 + xferOp.getTransferRank(), @@ -2543,7 +2544,7 @@ struct TransferReadExtractPattern extract.ids()[idCount++] * std_constant_index(extract.getResultType().getDimSize(pos)); } - Value newRead = vector_transfer_read(extract.getType(), read.memref(), + Value newRead = vector_transfer_read(extract.getType(), read.source(), indices, read.permutation_map(), read.padding(), read.maskedAttr()); Value dest = rewriter.create( @@ -2579,7 +2580,7 @@ struct TransferWriteInsertPattern insert.ids()[idCount++] * std_constant_index(insert.getSourceVectorType().getDimSize(pos)); } - vector_transfer_write(insert.vector(), write.memref(), indices, + vector_transfer_write(insert.vector(), write.source(), indices, write.permutation_map(), write.maskedAttr()); rewriter.eraseOp(write); return success(); diff --git a/mlir/lib/Dialect/Vector/VectorUtils.cpp b/mlir/lib/Dialect/Vector/VectorUtils.cpp index 3ab1f500f5d19..fc08d21b27a5c 100644 --- a/mlir/lib/Dialect/Vector/VectorUtils.cpp +++ b/mlir/lib/Dialect/Vector/VectorUtils.cpp @@ -243,16 +243,16 @@ AffineMap mlir::makePermutationMap( return ::makePermutationMap(indices, enclosingLoopToVectorDim); } -AffineMap mlir::getTransferMinorIdentityMap(MemRefType memRefType, +AffineMap mlir::getTransferMinorIdentityMap(ShapedType shapedType, VectorType vectorType) { int64_t elementVectorRank = 0; VectorType elementVectorType = - memRefType.getElementType().dyn_cast(); + shapedType.getElementType().dyn_cast(); if (elementVectorType) elementVectorRank += elementVectorType.getRank(); return AffineMap::getMinorIdentityMap( - memRefType.getRank(), vectorType.getRank() - elementVectorRank, - memRefType.getContext()); + shapedType.getRank(), vectorType.getRank() - elementVectorRank, + shapedType.getContext()); } bool matcher::operatesOnSuperVectorsOf(Operation &op, @@ -314,12 +314,12 @@ bool matcher::operatesOnSuperVectorsOf(Operation &op, bool mlir::isDisjointTransferSet(VectorTransferOpInterface transferA, VectorTransferOpInterface transferB) { - if (transferA.memref() != transferB.memref()) + if (transferA.source() != transferB.source()) return false; // For simplicity only look at transfer of same type. if (transferA.getVectorType() != transferB.getVectorType()) return false; - unsigned rankOffset = transferA.getLeadingMemRefRank(); + unsigned rankOffset = transferA.getLeadingShapedRank(); for (unsigned i = 0, e = transferA.indices().size(); i < e; i++) { auto indexA = transferA.indices()[i].getDefiningOp(); auto indexB = transferB.indices()[i].getDefiningOp(); diff --git a/mlir/test/Dialect/Vector/invalid.mlir b/mlir/test/Dialect/Vector/invalid.mlir index 73b1f9e1e06ef..62eaa4e3a14e1 100644 --- a/mlir/test/Dialect/Vector/invalid.mlir +++ b/mlir/test/Dialect/Vector/invalid.mlir @@ -269,7 +269,7 @@ func @test_vector.transfer_read(%arg0: vector<4x3xf32>) { %c3 = constant 3 : index %f0 = constant 0.0 : f32 %vf0 = splat %f0 : vector<4x3xf32> - // expected-error@+1 {{ requires memref type}} + // expected-error@+1 {{ requires memref or ranked tensor type}} %0 = vector.transfer_read %arg0[%c3, %c3], %vf0 : vector<4x3xf32>, vector<1x1x2x3xf32> } @@ -297,7 +297,7 @@ func @test_vector.transfer_read(%arg0: memref) { func @test_vector.transfer_read(%arg0: memref) { %c3 = constant 3 : index %cst = constant 3.0 : f32 - // expected-error@+1 {{requires a permutation_map with input dims of the same rank as the memref type}} + // expected-error@+1 {{requires a permutation_map with input dims of the same rank as the source type}} %0 = vector.transfer_read %arg0[%c3, %c3], %cst {permutation_map = affine_map<(d0)->(d0)>} : memref, vector<128xf32> } @@ -343,7 +343,7 @@ func @test_vector.transfer_read(%arg0: memref>) { %c3 = constant 3 : index %f0 = constant 0.0 : f32 %vf0 = splat %f0 : vector<4x3xf32> - // expected-error@+1 {{requires memref vector element and vector result ranks to match}} + // expected-error@+1 {{requires source vector element and vector result ranks to match}} %0 = vector.transfer_read %arg0[%c3, %c3], %vf0 {permutation_map = affine_map<(d0, d1)->(d0, d1)>} : memref>, vector<3xf32> } @@ -353,7 +353,7 @@ func @test_vector.transfer_read(%arg0: memref>) { %c3 = constant 3 : index %f0 = constant 0.0 : f32 %vf0 = splat %f0 : vector<6xf32> - // expected-error@+1 {{requires the bitwidth of the minor 1-D vector to be an integral multiple of the bitwidth of the minor 1-D vector of the memref}} + // expected-error@+1 {{requires the bitwidth of the minor 1-D vector to be an integral multiple of the bitwidth of the minor 1-D vector of the source}} %0 = vector.transfer_read %arg0[%c3, %c3], %vf0 : memref>, vector<3xf32> } @@ -392,7 +392,7 @@ func @test_vector.transfer_write(%arg0: vector<4x3xf32>) { %c3 = constant 3 : index %f0 = constant 0.0 : f32 %vf0 = splat %f0 : vector<4x3xf32> - // expected-error@+1 {{ requires memref type}} + // expected-error@+1 {{ requires memref or ranked tensor type}} vector.transfer_write %arg0, %arg0[%c3, %c3] : vector<4x3xf32>, f32 } @@ -419,7 +419,7 @@ func @test_vector.transfer_write(%arg0: memref) { func @test_vector.transfer_write(%arg0: memref) { %c3 = constant 3 : index %cst = constant dense<3.0> : vector<128 x f32> - // expected-error@+1 {{requires a permutation_map with input dims of the same rank as the memref type}} + // expected-error@+1 {{requires a permutation_map with input dims of the same rank as the source type}} vector.transfer_write %cst, %arg0[%c3, %c3] {permutation_map = affine_map<(d0)->(d0)>} : vector<128xf32>, memref } diff --git a/mlir/test/Dialect/Vector/ops.mlir b/mlir/test/Dialect/Vector/ops.mlir index aab6cabf759d8..07e9d8de3f493 100644 --- a/mlir/test/Dialect/Vector/ops.mlir +++ b/mlir/test/Dialect/Vector/ops.mlir @@ -43,6 +43,54 @@ func @vector_transfer_ops(%arg0: memref, return } + +// CHECK-LABEL: func @vector_transfer_ops_tensor( +func @vector_transfer_ops_tensor(%arg0: tensor, + %arg1 : tensor>, + %arg2 : tensor>) -> + (tensor, tensor, tensor>, + tensor>, tensor>){ + // CHECK: %[[C3:.*]] = constant 3 : index + %c3 = constant 3 : index + %cst = constant 3.0 : f32 + %f0 = constant 0.0 : f32 + %c0 = constant 0 : i32 + %vf0 = splat %f0 : vector<4x3xf32> + %v0 = splat %c0 : vector<4x3xi32> + + // + // CHECK: vector.transfer_read + %0 = vector.transfer_read %arg0[%c3, %c3], %f0 {permutation_map = affine_map<(d0, d1)->(d0)>} : tensor, vector<128xf32> + // CHECK: vector.transfer_read + %1 = vector.transfer_read %arg0[%c3, %c3], %f0 {permutation_map = affine_map<(d0, d1)->(d1, d0)>} : tensor, vector<3x7xf32> + // CHECK: vector.transfer_read + %2 = vector.transfer_read %arg0[%c3, %c3], %cst {permutation_map = affine_map<(d0, d1)->(d0)>} : tensor, vector<128xf32> + // CHECK: vector.transfer_read + %3 = vector.transfer_read %arg0[%c3, %c3], %cst {permutation_map = affine_map<(d0, d1)->(d1)>} : tensor, vector<128xf32> + // CHECK: vector.transfer_read %{{.*}}[%[[C3]], %[[C3]]], %{{.*}} : tensor>, vector<1x1x4x3xf32> + %4 = vector.transfer_read %arg1[%c3, %c3], %vf0 {permutation_map = affine_map<(d0, d1)->(d0, d1)>} : tensor>, vector<1x1x4x3xf32> + // CHECK: vector.transfer_read %{{.*}}[%[[C3]], %[[C3]]], %{{.*}} {masked = [true, false]} : tensor>, vector<1x1x4x3xf32> + %5 = vector.transfer_read %arg1[%c3, %c3], %vf0 {masked = [true, false]} : tensor>, vector<1x1x4x3xf32> + // CHECK: vector.transfer_read %{{.*}}[%[[C3]], %[[C3]]], %{{.*}} : tensor>, vector<5x24xi8> + %6 = vector.transfer_read %arg2[%c3, %c3], %v0 : tensor>, vector<5x24xi8> + + + // CHECK: vector.transfer_write + %7 = vector.transfer_write %0, %arg0[%c3, %c3] {permutation_map = affine_map<(d0, d1)->(d0)>} : vector<128xf32>, tensor + // CHECK: vector.transfer_write + %8 = vector.transfer_write %1, %arg0[%c3, %c3] {permutation_map = affine_map<(d0, d1)->(d1, d0)>} : vector<3x7xf32>, tensor + // CHECK: vector.transfer_write %{{.*}}, %{{.*}}[%[[C3]], %[[C3]]] : vector<1x1x4x3xf32>, tensor> + %9 = vector.transfer_write %4, %arg1[%c3, %c3] {permutation_map = affine_map<(d0, d1)->(d0, d1)>} : vector<1x1x4x3xf32>, tensor> + // CHECK: vector.transfer_write %{{.*}}, %{{.*}}[%[[C3]], %[[C3]]] : vector<1x1x4x3xf32>, tensor> + %10 = vector.transfer_write %5, %arg1[%c3, %c3] {masked = [true, true]} : vector<1x1x4x3xf32>, tensor> + // CHECK: vector.transfer_write %{{.*}}, %{{.*}}[%[[C3]], %[[C3]]] : vector<5x24xi8>, tensor> + %11 = vector.transfer_write %6, %arg2[%c3, %c3] : vector<5x24xi8>, tensor> + + return %7, %8, %9, %10, %11 : + tensor, tensor, tensor>, + tensor>, tensor> +} + // CHECK-LABEL: @vector_broadcast func @vector_broadcast(%a: f32, %b: vector<16xf32>, %c: vector<1x16xf32>, %d: vector<8x1xf32>) -> vector<8x16xf32> { // CHECK: vector.broadcast %{{.*}} : f32 to vector<16xf32> From a323682dcbfdce5860fa33335f0fd87adf04360a Mon Sep 17 00:00:00 2001 From: Dmitry Preobrazhensky Date: Mon, 21 Dec 2020 19:52:27 +0300 Subject: [PATCH 023/378] [AMDGPU][MC][NFC] Lit tests cleanup See bug 48513 Reviewers: rampitec Differential Revision: https://reviews.llvm.org/D93550 --- llvm/test/MC/AMDGPU/flat-gfx9.s | 1 - llvm/test/MC/AMDGPU/flat-global.s | 1 - llvm/test/MC/AMDGPU/flat.s | 6 ------ llvm/test/MC/AMDGPU/fma-mix.s | 4 ---- llvm/test/MC/AMDGPU/literal16.s | 1 + llvm/test/MC/AMDGPU/mad-mix.s | 4 ---- llvm/test/MC/AMDGPU/smem.s | 1 - llvm/test/MC/AMDGPU/vop1-gfx9-err.s | 1 - llvm/test/MC/AMDGPU/vop1.s | 2 +- llvm/test/MC/Disassembler/AMDGPU/vop3_gfx9.txt | 2 +- llvm/test/MC/Disassembler/AMDGPU/vop3_vi.txt | 2 +- 11 files changed, 4 insertions(+), 21 deletions(-) diff --git a/llvm/test/MC/AMDGPU/flat-gfx9.s b/llvm/test/MC/AMDGPU/flat-gfx9.s index 858907ce436ea..da1ec062decee 100644 --- a/llvm/test/MC/AMDGPU/flat-gfx9.s +++ b/llvm/test/MC/AMDGPU/flat-gfx9.s @@ -12,7 +12,6 @@ flat_load_dword v1, v[3:4] offset:-1 // VI-ERR: :28: error: flat offset modifier is not supported on this GPU // GFX9-ERR: :28: error: expected a 12-bit unsigned offset -// FIXME: Error on VI in wrong column flat_load_dword v1, v[3:4] offset:4095 // GFX9: flat_load_dword v1, v[3:4] offset:4095 ; encoding: [0xff,0x0f,0x50,0xdc,0x03,0x00,0x00,0x01] // VI-ERR: :28: error: flat offset modifier is not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/flat-global.s b/llvm/test/MC/AMDGPU/flat-global.s index 91c10ae137234..77092e0b34937 100644 --- a/llvm/test/MC/AMDGPU/flat-global.s +++ b/llvm/test/MC/AMDGPU/flat-global.s @@ -85,7 +85,6 @@ global_load_dwordx4 v[1:4], v[3:4], off dlc // GFX9-ERR: error: failed parsing operand // VI-ERR: error: instruction not supported on this GPU -// FIXME: VI error should be instruction nto supported global_load_dword v1, v[3:4], off offset:0 // GFX10: encoding: [0x00,0x80,0x30,0xdc,0x03,0x00,0x7d,0x01] // GFX9: global_load_dword v1, v[3:4], off ; encoding: [0x00,0x80,0x50,0xdc,0x03,0x00,0x7f,0x01] diff --git a/llvm/test/MC/AMDGPU/flat.s b/llvm/test/MC/AMDGPU/flat.s index 31dd4f0500f1e..f307ae30a759a 100644 --- a/llvm/test/MC/AMDGPU/flat.s +++ b/llvm/test/MC/AMDGPU/flat.s @@ -1,12 +1,6 @@ // RUN: llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s | FileCheck %s --check-prefix=CIVI --check-prefix=CI // RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefix=CIVI --check-prefix=VI -// FIXME: For missing instruction the error message is: -// error: too few operands for instruction -// It should be: -// error: instruction not supported on this GPU -// - // RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck %s --check-prefix=NOVI --implicit-check-not=error: // RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck %s --check-prefix=NOSI --implicit-check-not=error: // RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s --check-prefix=NOSI --implicit-check-not=error: diff --git a/llvm/test/MC/AMDGPU/fma-mix.s b/llvm/test/MC/AMDGPU/fma-mix.s index 6bd293e467f94..f062664bf8c1b 100644 --- a/llvm/test/MC/AMDGPU/fma-mix.s +++ b/llvm/test/MC/AMDGPU/fma-mix.s @@ -22,8 +22,6 @@ v_fma_mix_f32 v0, abs(v1), v2, v3 // GFX9-FMAMIX: v_fma_mix_f32 v0, |v1|, v2, v3 ; encoding: [0x00,0x01,0xa0,0xd3,0x01,0x05,0x0e,0x04] // GFX9-MADMIX-ERR: error: instruction not supported on this GPU -// FIXME: Improve error messages - v_fma_mix_f32 v0, v1, abs(v2), v3 // GFX9-FMAMIX: v_fma_mix_f32 v0, v1, |v2|, v3 ; encoding: [0x00,0x02,0xa0,0xd3,0x01,0x05,0x0e,0x04] // GFX9-MADMIX-ERR: error: instruction not supported on this GPU @@ -80,8 +78,6 @@ v_fma_mix_f32 v0, v1, v2, v3 op_sel:[0,0,0] // GFX9-FMAMIX: v_fma_mix_f32 v0, v1, v2, v3 ; encoding: [0x00,0x00,0xa0,0xd3,0x01,0x05,0x0e,0x04] // GFX9-MADMIX-ERR: error: instruction not supported on this GPU -// FIXME: Improve error messages - v_fma_mix_f32 v0, v1, v2, v3 op_sel:[1,0,0] // GFX9-FMAMIX: v_fma_mix_f32 v0, v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x00,0x08,0xa0,0xd3,0x01,0x05,0x0e,0x04] // GFX9-MADMIX-ERR: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/literal16.s b/llvm/test/MC/AMDGPU/literal16.s index 97d16c3742851..2a641d53a9b66 100644 --- a/llvm/test/MC/AMDGPU/literal16.s +++ b/llvm/test/MC/AMDGPU/literal16.s @@ -146,3 +146,4 @@ v_madmk_f16 v1, v2, 64.0, v3 v_add_f16_e32 v1, 64.0, v2 +// VI: v_add_f16_e32 v1, 0x5400, v2 ; encoding: [0xff,0x04,0x02,0x3e,0x00,0x54,0x00,0x00] diff --git a/llvm/test/MC/AMDGPU/mad-mix.s b/llvm/test/MC/AMDGPU/mad-mix.s index f1de62b5a5482..4b28d03bb8284 100644 --- a/llvm/test/MC/AMDGPU/mad-mix.s +++ b/llvm/test/MC/AMDGPU/mad-mix.s @@ -22,8 +22,6 @@ v_mad_mix_f32 v0, abs(v1), v2, v3 // GFX9-MADMIX: v_mad_mix_f32 v0, |v1|, v2, v3 ; encoding: [0x00,0x01,0xa0,0xd3,0x01,0x05,0x0e,0x04] // GFX9-FMAMIX-ERR: error: instruction not supported on this GPU -// FIXME: Improve diagnistics - v_mad_mix_f32 v0, v1, abs(v2), v3 // GFX9-MADMIX: v_mad_mix_f32 v0, v1, |v2|, v3 ; encoding: [0x00,0x02,0xa0,0xd3,0x01,0x05,0x0e,0x04] // GFX9-FMAMIX-ERR: error: instruction not supported on this GPU @@ -80,8 +78,6 @@ v_mad_mix_f32 v0, v1, v2, v3 op_sel:[0,0,0] // GFX9-MADMIX: v_mad_mix_f32 v0, v1, v2, v3 ; encoding: [0x00,0x00,0xa0,0xd3,0x01,0x05,0x0e,0x04] // GFX9-FMAMIX-ERR: error: instruction not supported on this GPU -// FIXME: Improve diagnistics - v_mad_mix_f32 v0, v1, v2, v3 op_sel:[1,0,0] // GFX9-MADMIX: v_mad_mix_f32 v0, v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x00,0x08,0xa0,0xd3,0x01,0x05,0x0e,0x04] // GFX9-FMAMIX-ERR: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/smem.s b/llvm/test/MC/AMDGPU/smem.s index ff725dbba4132..46bf2baa1c61f 100644 --- a/llvm/test/MC/AMDGPU/smem.s +++ b/llvm/test/MC/AMDGPU/smem.s @@ -63,7 +63,6 @@ s_memrealtime ttmp[0:1] // GFX10: s_memrealtime ttmp[0:1] ; encoding: [0x00,0x1b,0x94,0xf4,0x00,0x00,0x00,0x00] // NOSICI: error: instruction not supported on this GPU -// FIXME: Should error about instruction on GPU s_store_dword s1, s[2:3], 0xfc // GFX89: s_store_dword s1, s[2:3], 0xfc ; encoding: [0x41,0x00,0x42,0xc0,0xfc,0x00,0x00,0x00] // GFX1012: s_store_dword s1, s[2:3], 0xfc ; encoding: [0x41,0x00,0x40,0xf4,0xfc,0x00,0x00,0xfa] diff --git a/llvm/test/MC/AMDGPU/vop1-gfx9-err.s b/llvm/test/MC/AMDGPU/vop1-gfx9-err.s index 42feac2f0aa21..0afa306182a93 100644 --- a/llvm/test/MC/AMDGPU/vop1-gfx9-err.s +++ b/llvm/test/MC/AMDGPU/vop1-gfx9-err.s @@ -12,7 +12,6 @@ v_swap_b32 v1, s0 // GFX9: error: invalid operand for instruction // VI: error: instruction not supported on this GPU -// FIXME: Better error for it requiring VOP1 encoding v_swap_b32_e64 v1, v2 // GFX9: :1: error: e64 variant of this instruction is not supported // CI: :1: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/vop1.s b/llvm/test/MC/AMDGPU/vop1.s index 12a033c92992c..df0b384e3efd4 100644 --- a/llvm/test/MC/AMDGPU/vop1.s +++ b/llvm/test/MC/AMDGPU/vop1.s @@ -1,6 +1,6 @@ // RUN: not llvm-mc -arch=amdgcn -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=SI --check-prefix=SICI // RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=SI --check-prefix=SICI -// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=SICI --check-prefix=CIVI +// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=CI --check-prefix=SICI --check-prefix=CIVI // RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=CIVI --check-prefix=VI // RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck %s --check-prefix=NOSI --check-prefix=NOSICI --implicit-check-not=error: diff --git a/llvm/test/MC/Disassembler/AMDGPU/vop3_gfx9.txt b/llvm/test/MC/Disassembler/AMDGPU/vop3_gfx9.txt index fbbd8ae864122..fe9617a1ae16a 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/vop3_gfx9.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/vop3_gfx9.txt @@ -693,7 +693,7 @@ # GFX9: v_sat_pk_u8_i16_e64 v255, v1 ; encoding: [0xff,0x00,0x8f,0xd1,0x01,0x01,0x00,0x00] 0xff,0x00,0x8f,0xd1,0x01,0x01,0x00,0x00 -# GXF9: v_screen_partition_4se_b32_e64 v5, v1 ; encoding: [0x05,0x00,0x77,0xd1,0x01,0x01,0x00,0x00] +# GFX9: v_screen_partition_4se_b32_e64 v5, v1 ; encoding: [0x05,0x00,0x77,0xd1,0x01,0x01,0x00,0x00] 0x05,0x00,0x77,0xd1,0x01,0x01,0x00,0x00 # GFX9: v_add_u32_e64 v84, v13, s31 clamp ; encoding: [0x54,0x80,0x34,0xd1,0x0d,0x3f,0x00,0x00] diff --git a/llvm/test/MC/Disassembler/AMDGPU/vop3_vi.txt b/llvm/test/MC/Disassembler/AMDGPU/vop3_vi.txt index cd9a65cc13e83..5ee8b00fd044a 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/vop3_vi.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/vop3_vi.txt @@ -225,7 +225,7 @@ # VI: v_div_scale_f32 v24, vcc, v22, v22, v20 ; encoding: [0x18,0x6a,0xe0,0xd1,0x16,0x2d,0x52,0x04] 0x18 0x6a 0xe0 0xd1 0x16 0x2d 0x52 0x04 -# FIXME: v_div_scale_f32 v24, vcc, s[10:11], v22, v20 ; encoding: [0x18,0x6a,0xe0,0xd1,0x0a,0x2c,0x52,0x04] +# VI: v_div_scale_f32 v24, vcc, s10, v22, v20 ; encoding: [0x18,0x6a,0xe0,0xd1,0x0a,0x2c,0x52,0x04] 0x18 0x6a 0xe0 0xd1 0x0a 0x2c 0x52 0x04 # VI: v_div_scale_f32 v24, s[10:11], v22, v22, v20 ; encoding: [0x18,0x0a,0xe0,0xd1,0x16,0x2d,0x52,0x04] From 8ab5770a17fee4c39e23fc52a30057eb689fa578 Mon Sep 17 00:00:00 2001 From: Dmitry Preobrazhensky Date: Mon, 21 Dec 2020 20:21:07 +0300 Subject: [PATCH 024/378] [AMDGPU][MC][NFC] Parser refactoring See bug 48515 (https://bugs.llvm.org/show_bug.cgi?id=48515) Reviewers: rampitec Differential Revision: https://reviews.llvm.org/D93548 --- .../AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 170 +++++------------- 1 file changed, 47 insertions(+), 123 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index 6597b627f0efe..f472e4d7eace9 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -1434,7 +1434,7 @@ class AMDGPUAsmParser : public MCTargetAsmParser { bool trySkipToken(const AsmToken::TokenKind Kind); bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); - bool parseId(StringRef &Val, const StringRef ErrMsg); + bool parseId(StringRef &Val, const StringRef ErrMsg = ""); void peekTokens(MutableArrayRef Tokens); AsmToken::TokenKind getTokenKind() const; @@ -4073,9 +4073,8 @@ bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, if (ParseAsAbsoluteExpression(Major)) return TokError("invalid major version"); - if (getLexer().isNot(AsmToken::Comma)) + if (!trySkipToken(AsmToken::Comma)) return TokError("minor version number required, comma expected"); - Lex(); if (ParseAsAbsoluteExpression(Minor)) return TokError("invalid minor version"); @@ -4178,15 +4177,12 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { Optional EnableWavefrontSize32; while (true) { - while (getLexer().is(AsmToken::EndOfStatement)) - Lex(); - - if (getLexer().isNot(AsmToken::Identifier)) - return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel"); + while (trySkipToken(AsmToken::EndOfStatement)); - StringRef ID = getTok().getIdentifier(); + StringRef ID; SMRange IDRange = getTok().getLocRange(); - Lex(); + if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel")) + return true; if (ID == ".end_amdhsa_kernel") break; @@ -4469,32 +4465,23 @@ bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { if (ParseDirectiveMajorMinor(Major, Minor)) return true; - if (getLexer().isNot(AsmToken::Comma)) + if (!trySkipToken(AsmToken::Comma)) return TokError("stepping version number required, comma expected"); - Lex(); if (ParseAsAbsoluteExpression(Stepping)) return TokError("invalid stepping version"); - if (getLexer().isNot(AsmToken::Comma)) + if (!trySkipToken(AsmToken::Comma)) return TokError("vendor name required, comma expected"); - Lex(); - - if (getLexer().isNot(AsmToken::String)) - return TokError("invalid vendor name"); - VendorName = getLexer().getTok().getStringContents(); - Lex(); + if (!parseString(VendorName, "invalid vendor name")) + return true; - if (getLexer().isNot(AsmToken::Comma)) + if (!trySkipToken(AsmToken::Comma)) return TokError("arch name required, comma expected"); - Lex(); - - if (getLexer().isNot(AsmToken::String)) - return TokError("invalid arch name"); - ArchName = getLexer().getTok().getStringContents(); - Lex(); + if (!parseString(ArchName, "invalid arch name")) + return true; getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping, VendorName, ArchName); @@ -4569,14 +4556,11 @@ bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { while (true) { // Lex EndOfStatement. This is in a while loop, because lexing a comment // will set the current token to EndOfStatement. - while(getLexer().is(AsmToken::EndOfStatement)) - Lex(); - - if (getLexer().isNot(AsmToken::Identifier)) - return TokError("expected value identifier or .end_amd_kernel_code_t"); + while(trySkipToken(AsmToken::EndOfStatement)); - StringRef ID = getLexer().getTok().getIdentifier(); - Lex(); + StringRef ID; + if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t")) + return true; if (ID == ".end_amd_kernel_code_t") break; @@ -4678,13 +4662,9 @@ bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, Lex(); } - if (getLexer().is(AsmToken::Identifier)) { - StringRef ID = getLexer().getTok().getIdentifier(); - if (ID == AssemblerDirectiveEnd) { - Lex(); - FoundEnd = true; - break; - } + if (trySkipId(AssemblerDirectiveEnd)) { + FoundEnd = true; + break; } CollectStream << Parser.parseStringToEndOfStatement() @@ -4733,19 +4713,17 @@ bool AMDGPUAsmParser::ParseDirectivePALMetadata() { return TokError(Twine("invalid value in ") + Twine(PALMD::AssemblerDirective)); } - if (getLexer().isNot(AsmToken::Comma)) { + if (!trySkipToken(AsmToken::Comma)) { return TokError(Twine("expected an even number of values in ") + Twine(PALMD::AssemblerDirective)); } - Lex(); if (ParseAsAbsoluteExpression(Value)) { return TokError(Twine("invalid value in ") + Twine(PALMD::AssemblerDirective)); } PALMetadata->setRegister(Key, Value); - if (getLexer().isNot(AsmToken::Comma)) + if (!trySkipToken(AsmToken::Comma)) break; - Lex(); } return false; } @@ -4777,8 +4755,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { return Error(SizeLoc, "size is too large"); int64_t Alignment = 4; - if (getLexer().is(AsmToken::Comma)) { - Lex(); + if (trySkipToken(AsmToken::Comma)) { SMLoc AlignLoc = getLexer().getLoc(); if (getParser().parseAbsoluteExpression(Alignment)) return true; @@ -4933,32 +4910,30 @@ AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, getLexer().is(AsmToken::EndOfStatement)) return ResTy; - if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) { + SMLoc RBraceLoc; + SMLoc LBraceLoc = getLoc(); + if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) { unsigned Prefix = Operands.size(); - SMLoc LBraceLoc = getTok().getLoc(); - Parser.Lex(); // eat the '[' for (;;) { ResTy = parseReg(Operands); if (ResTy != MatchOperand_Success) return ResTy; - if (getLexer().is(AsmToken::RBrac)) + RBraceLoc = getLoc(); + if (trySkipToken(AsmToken::RBrac)) break; - if (getLexer().isNot(AsmToken::Comma)) + if (!trySkipToken(AsmToken::Comma)) return MatchOperand_ParseFail; - Parser.Lex(); } if (Operands.size() - Prefix > 1) { Operands.insert(Operands.begin() + Prefix, AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); - Operands.push_back(AMDGPUOperand::CreateToken(this, "]", - getTok().getLoc())); + Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc)); } - Parser.Lex(); // eat the ']' return MatchOperand_Success; } @@ -4996,15 +4971,14 @@ bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, bool IsMIMG = Name.startswith("image_"); - while (!getLexer().is(AsmToken::EndOfStatement)) { + while (!trySkipToken(AsmToken::EndOfStatement)) { OperandMode Mode = OperandMode_Default; if (IsMIMG && isGFX10Plus() && Operands.size() == 2) Mode = OperandMode_NSA; OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); // Eat the comma or space if there is one. - if (getLexer().is(AsmToken::Comma)) - Parser.Lex(); + trySkipToken(AsmToken::Comma); if (Res != MatchOperand_Success) { checkUnsupportedInstruction(Name, NameLoc); @@ -5015,14 +4989,12 @@ bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, "not a valid operand."; Error(getLexer().getLoc(), Msg); } - while (!getLexer().is(AsmToken::EndOfStatement)) { + while (!trySkipToken(AsmToken::EndOfStatement)) { Parser.Lex(); } - Parser.Lex(); return true; } } - Parser.Lex(); return false; } @@ -5163,26 +5135,13 @@ static void addOptionalImmOperand( OperandMatchResultTy AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) { - if (getLexer().isNot(AsmToken::Identifier)) { + if (!trySkipId(Prefix)) return MatchOperand_NoMatch; - } - StringRef Tok = Parser.getTok().getString(); - if (Tok != Prefix) { - return MatchOperand_NoMatch; - } - Parser.Lex(); - if (getLexer().isNot(AsmToken::Colon)) { + if (!trySkipToken(AsmToken::Colon)) return MatchOperand_ParseFail; - } - Parser.Lex(); - if (getLexer().isNot(AsmToken::Identifier)) { - return MatchOperand_ParseFail; - } - - Value = Parser.getTok().getString(); - return MatchOperand_Success; + return parseId(Value) ? MatchOperand_Success : MatchOperand_ParseFail; } //===----------------------------------------------------------------------===// @@ -6137,7 +6096,8 @@ AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) { lex(); return true; } else { - Error(getLoc(), ErrMsg); + if (!ErrMsg.empty()) + Error(getLoc(), ErrMsg); return false; } } @@ -6541,17 +6501,10 @@ AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { int64_t Imm = 0; SMLoc S = Parser.getTok().getLoc(); - if (getLexer().getKind() == AsmToken::Identifier && - Parser.getTok().getString() == "gpr_idx" && - getLexer().peekTok().is(AsmToken::LParen)) { - - Parser.Lex(); - Parser.Lex(); - + if (trySkipId("gpr_idx", AsmToken::LParen)) { Imm = parseGPRIdxMacro(); if (Imm == UNDEF) return MatchOperand_ParseFail; - } else { if (getParser().parseAbsoluteExpression(Imm)) return MatchOperand_ParseFail; @@ -7298,17 +7251,9 @@ OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { SMLoc S = Parser.getTok().getLoc(); - if (getLexer().isNot(AsmToken::Identifier)) - return MatchOperand_NoMatch; - if (getLexer().getTok().getString() != "dim") + if (!trySkipId("dim", AsmToken::Colon)) return MatchOperand_NoMatch; - Parser.Lex(); - if (getLexer().isNot(AsmToken::Colon)) - return MatchOperand_ParseFail; - - Parser.Lex(); - // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an // integer. std::string Token; @@ -7342,49 +7287,33 @@ OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { SMLoc S = Parser.getTok().getLoc(); StringRef Prefix; - if (getLexer().getKind() == AsmToken::Identifier) { - Prefix = Parser.getTok().getString(); - } else { - return MatchOperand_NoMatch; - } - - if (Prefix != "dpp8") - return parseDPPCtrl(Operands); - if (!isGFX10Plus()) + if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon)) return MatchOperand_NoMatch; // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] int64_t Sels[8]; - Parser.Lex(); - if (getLexer().isNot(AsmToken::Colon)) - return MatchOperand_ParseFail; - - Parser.Lex(); - if (getLexer().isNot(AsmToken::LBrac)) + if (!trySkipToken(AsmToken::LBrac)) return MatchOperand_ParseFail; - Parser.Lex(); if (getParser().parseAbsoluteExpression(Sels[0])) return MatchOperand_ParseFail; if (0 > Sels[0] || 7 < Sels[0]) return MatchOperand_ParseFail; for (size_t i = 1; i < 8; ++i) { - if (getLexer().isNot(AsmToken::Comma)) + if (!trySkipToken(AsmToken::Comma)) return MatchOperand_ParseFail; - Parser.Lex(); if (getParser().parseAbsoluteExpression(Sels[i])) return MatchOperand_ParseFail; if (0 > Sels[i] || 7 < Sels[i]) return MatchOperand_ParseFail; } - if (getLexer().isNot(AsmToken::RBrac)) + if (!trySkipToken(AsmToken::RBrac)) return MatchOperand_ParseFail; - Parser.Lex(); unsigned DPP8 = 0; for (size_t i = 0; i < 8; ++i) @@ -7446,17 +7375,15 @@ AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { if (Prefix == "quad_perm") { // quad_perm:[%d,%d,%d,%d] Parser.Lex(); - if (getLexer().isNot(AsmToken::LBrac)) + if (!trySkipToken(AsmToken::LBrac)) return MatchOperand_ParseFail; - Parser.Lex(); if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3)) return MatchOperand_ParseFail; for (int i = 0; i < 3; ++i) { - if (getLexer().isNot(AsmToken::Comma)) + if (!trySkipToken(AsmToken::Comma)) return MatchOperand_ParseFail; - Parser.Lex(); int64_t Temp; if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3)) @@ -7465,9 +7392,8 @@ AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { Int += (Temp << shift); } - if (getLexer().isNot(AsmToken::RBrac)) + if (!trySkipToken(AsmToken::RBrac)) return MatchOperand_ParseFail; - Parser.Lex(); } else { // sel:%d Parser.Lex(); @@ -7623,7 +7549,6 @@ AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, .Case("WORD_1", SdwaSel::WORD_1) .Case("DWORD", SdwaSel::DWORD) .Default(0xffffffff); - Parser.Lex(); // eat last token if (Int == 0xffffffff) { return MatchOperand_ParseFail; @@ -7652,7 +7577,6 @@ AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) .Default(0xffffffff); - Parser.Lex(); // eat last token if (Int == 0xffffffff) { return MatchOperand_ParseFail; From f4f49d9d0d699f3ac32c1037516c9ab17551991e Mon Sep 17 00:00:00 2001 From: Dmitry Preobrazhensky Date: Mon, 21 Dec 2020 20:42:35 +0300 Subject: [PATCH 025/378] [AMDGPU][MC][NFC] Fix for sanitizer error in 8ab5770 Corrected to fix sanitizer error introduced by 8ab5770 --- llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index f472e4d7eace9..f6b204f2415fc 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -7285,7 +7285,6 @@ OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { SMLoc S = Parser.getTok().getLoc(); - StringRef Prefix; if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon)) return MatchOperand_NoMatch; From bb8d20d9f3bb955ae6f6143d24749faf61d573a9 Mon Sep 17 00:00:00 2001 From: Michael Liao Date: Mon, 21 Dec 2020 13:02:47 -0500 Subject: [PATCH 026/378] [cuda][hip] Fix typoes in header wrappers. --- clang/lib/Headers/cuda_wrappers/algorithm | 2 +- clang/lib/Headers/cuda_wrappers/new | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/lib/Headers/cuda_wrappers/algorithm b/clang/lib/Headers/cuda_wrappers/algorithm index 01af18360d8d4..f14a0b00bb046 100644 --- a/clang/lib/Headers/cuda_wrappers/algorithm +++ b/clang/lib/Headers/cuda_wrappers/algorithm @@ -1,4 +1,4 @@ -/*===---- complex - CUDA wrapper for ----------------------------=== +/*===---- algorithm - CUDA wrapper for -------------------------=== * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal diff --git a/clang/lib/Headers/cuda_wrappers/new b/clang/lib/Headers/cuda_wrappers/new index 7f255314056ac..d5fb3b7011de9 100644 --- a/clang/lib/Headers/cuda_wrappers/new +++ b/clang/lib/Headers/cuda_wrappers/new @@ -1,4 +1,4 @@ -/*===---- complex - CUDA wrapper for ------------------------------=== +/*===---- new - CUDA wrapper for -------------------------------------=== * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal From dfa40840e0e2fa094c5d3f441affe0785cdc8d09 Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Thu, 17 Dec 2020 21:02:01 -0800 Subject: [PATCH 027/378] scudo: Remove ANDROID_EXPERIMENTAL_MTE macro. Kernel support for MTE has been released in Linux 5.10. This means that it is a stable API and we no longer need to make the support conditional on a macro. We do need to provide conditional definitions of the new macros though in order to avoid a dependency on new kernel headers. Differential Revision: https://reviews.llvm.org/D93513 --- compiler-rt/lib/scudo/standalone/linux.cpp | 9 +++---- compiler-rt/lib/scudo/standalone/memtag.h | 29 +++++++++++++--------- 2 files changed, 21 insertions(+), 17 deletions(-) diff --git a/compiler-rt/lib/scudo/standalone/linux.cpp b/compiler-rt/lib/scudo/standalone/linux.cpp index 12f3da620e123..d2464677b2792 100644 --- a/compiler-rt/lib/scudo/standalone/linux.cpp +++ b/compiler-rt/lib/scudo/standalone/linux.cpp @@ -35,10 +35,6 @@ #define ANDROID_PR_SET_VMA_ANON_NAME 0 #endif -#ifdef ANDROID_EXPERIMENTAL_MTE -#include -#endif - namespace scudo { uptr getPageSize() { return static_cast(sysconf(_SC_PAGESIZE)); } @@ -54,7 +50,10 @@ void *map(void *Addr, uptr Size, UNUSED const char *Name, uptr Flags, MmapProt = PROT_NONE; } else { MmapProt = PROT_READ | PROT_WRITE; -#if defined(__aarch64__) && defined(ANDROID_EXPERIMENTAL_MTE) +#if defined(__aarch64__) +#ifndef PROT_MTE +#define PROT_MTE 0x20 +#endif if (Flags & MAP_MEMTAG) MmapProt |= PROT_MTE; #endif diff --git a/compiler-rt/lib/scudo/standalone/memtag.h b/compiler-rt/lib/scudo/standalone/memtag.h index 4b22c727849db..c3c4f574b4fc9 100644 --- a/compiler-rt/lib/scudo/standalone/memtag.h +++ b/compiler-rt/lib/scudo/standalone/memtag.h @@ -14,9 +14,6 @@ #if SCUDO_LINUX #include #include -#if defined(ANDROID_EXPERIMENTAL_MTE) -#include -#endif #endif namespace scudo { @@ -56,20 +53,28 @@ inline uint8_t extractTag(uptr Ptr) { #if defined(__aarch64__) inline bool systemSupportsMemoryTagging() { -#if defined(ANDROID_EXPERIMENTAL_MTE) - return getauxval(AT_HWCAP2) & HWCAP2_MTE; -#else - return false; +#ifndef HWCAP2_MTE +#define HWCAP2_MTE (1 << 18) #endif + return getauxval(AT_HWCAP2) & HWCAP2_MTE; } inline bool systemDetectsMemoryTagFaultsTestOnly() { -#if defined(ANDROID_EXPERIMENTAL_MTE) - return (prctl(PR_GET_TAGGED_ADDR_CTRL, 0, 0, 0, 0) & PR_MTE_TCF_MASK) != - PR_MTE_TCF_NONE; -#else - return false; +#ifndef PR_GET_TAGGED_ADDR_CTRL +#define PR_GET_TAGGED_ADDR_CTRL 56 +#endif +#ifndef PR_MTE_TCF_SHIFT +#define PR_MTE_TCF_SHIFT 1 +#endif +#ifndef PR_MTE_TCF_NONE +#define PR_MTE_TCF_NONE (0UL << PR_MTE_TCF_SHIFT) +#endif +#ifndef PR_MTE_TCF_MASK +#define PR_MTE_TCF_MASK (3UL << PR_MTE_TCF_SHIFT) #endif + return (static_cast( + prctl(PR_GET_TAGGED_ADDR_CTRL, 0, 0, 0, 0)) & + PR_MTE_TCF_MASK) != PR_MTE_TCF_NONE; } inline void disableMemoryTagChecksTestOnly() { From 43def795aacd6794f93b91fc76e59953fd67e138 Mon Sep 17 00:00:00 2001 From: Hafiz Abid Qadeer Date: Mon, 21 Dec 2020 19:06:17 +0000 Subject: [PATCH 028/378] Update references to 'master' branch. This commit replace 'master' with 'main' in llvm/docs. Reviewed By: sammccall, kristof.beyls Differential Revision: https://reviews.llvm.org/D92831 --- llvm/docs/CodingStandards.rst | 2 +- llvm/docs/DeveloperPolicy.rst | 4 ++-- llvm/docs/FAQ.rst | 2 +- llvm/docs/GettingStarted.rst | 10 +++++----- llvm/docs/GitBisecting.rst | 6 +++--- llvm/docs/GlobalISel/IRTranslator.rst | 2 +- llvm/docs/LibFuzzer.rst | 4 ++-- llvm/docs/TestingGuide.rst | 2 +- llvm/docs/TypeMetadata.rst | 2 +- 9 files changed, 17 insertions(+), 17 deletions(-) diff --git a/llvm/docs/CodingStandards.rst b/llvm/docs/CodingStandards.rst index a5798bd73cb13..57d148df89f80 100644 --- a/llvm/docs/CodingStandards.rst +++ b/llvm/docs/CodingStandards.rst @@ -77,7 +77,7 @@ on the standard library facilities and the LLVM support libraries as much as possible. LLVM support libraries (for example, `ADT -`_) +`_) implement specialized data structures or functionality missing in the standard library. Such libraries are usually implemented in the ``llvm`` namespace and follow the expected standard interface, when there is one. diff --git a/llvm/docs/DeveloperPolicy.rst b/llvm/docs/DeveloperPolicy.rst index 3fa629965318c..7bcb5664540a3 100644 --- a/llvm/docs/DeveloperPolicy.rst +++ b/llvm/docs/DeveloperPolicy.rst @@ -80,7 +80,7 @@ Making and Submitting a Patch When making a patch for review, the goal is to make it as easy for the reviewer to read it as possible. As such, we recommend that you: -#. Make your patch against git master, not a branch, and not an old version +#. Make your patch against git main, not a branch, and not an old version of LLVM. This makes it easy to apply the patch. For information on how to clone from git, please see the :ref:`Getting Started Guide `. @@ -146,7 +146,7 @@ problem, we have a notion of an 'owner' for a piece of the code. The sole responsibility of a code owner is to ensure that a commit to their area of the code is appropriately reviewed, either by themself or by someone else. The list of current code owners can be found in the file `CODE_OWNERS.TXT -`_ in the +`_ in the root of the LLVM source tree. Note that code ownership is completely different than reviewers: anyone can diff --git a/llvm/docs/FAQ.rst b/llvm/docs/FAQ.rst index aef15d6dc711d..229ac99f703c1 100644 --- a/llvm/docs/FAQ.rst +++ b/llvm/docs/FAQ.rst @@ -13,7 +13,7 @@ Can I modify LLVM source code and redistribute the modified source? ------------------------------------------------------------------- Yes. The modified source distribution must retain the copyright notice and follow the conditions listed in the `Apache License v2.0 with LLVM Exceptions -`_. +`_. Can I modify the LLVM source code and redistribute binaries or other tools based on it, without redistributing the source? diff --git a/llvm/docs/GettingStarted.rst b/llvm/docs/GettingStarted.rst index d4e4a3b039280..a2274f80fc1f0 100644 --- a/llvm/docs/GettingStarted.rst +++ b/llvm/docs/GettingStarted.rst @@ -457,7 +457,7 @@ either via emailing to llvm-commits, or, preferably, via :ref:`Phabricator You'll generally want to make sure your branch has a single commit, corresponding to the review you wish to send, up-to-date with the upstream -``origin/master`` branch, and doesn't contain merges. Once you have that, you +``origin/main`` branch, and doesn't contain merges. Once you have that, you can start `a Phabricator review `_ (or use ``git show`` or ``git format-patch`` to output the diff, and attach it to an email message). @@ -501,7 +501,7 @@ For developers to commit changes from Git ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Once a patch is reviewed, you should rebase it, re-test locally, and commit the -changes to LLVM's master branch. This is done using `git push` if you have the +changes to LLVM's main branch. This is done using `git push` if you have the required access rights. See `committing a change `_ for Phabricator based commits or `obtaining commit access `_ @@ -515,13 +515,13 @@ accepted commit on the branch named `branch-with-change`. # Go to the branch with your accepted commit. % git checkout branch-with-change # Rebase your change onto the latest commits on Github. - % git pull --rebase origin master + % git pull --rebase origin main # Rerun the appropriate tests if needed. % ninja check-$whatever # Check that the list of commits about to be pushed is correct. - % git log origin/master...HEAD --oneline + % git log origin/main...HEAD --oneline # Push to Github. - % git push origin HEAD:master + % git push origin HEAD:main LLVM currently has a linear-history policy, which means that merge commits are not allowed. The `llvm-project` repo on github is configured to reject pushes diff --git a/llvm/docs/GitBisecting.rst b/llvm/docs/GitBisecting.rst index 8f44e351a085f..81876c74caa7b 100644 --- a/llvm/docs/GitBisecting.rst +++ b/llvm/docs/GitBisecting.rst @@ -23,7 +23,7 @@ See https://git-scm.com/docs/git-bisect for a good overview. In summary: .. code-block:: bash git bisect start - git bisect bad master + git bisect bad main git bisect good f00ba git will check out a revision in between. Try to reproduce your problem at @@ -33,8 +33,8 @@ If you can't repro at the current commit (maybe the build is broken), run ``git bisect skip`` and git will pick a nearby alternate commit. (To abort a bisect, run ``git bisect reset``, and if git complains about not -being able to reset, do the usual ``git checkout -f master; git reset --hard -origin/master`` dance and try again). +being able to reset, do the usual ``git checkout -f main; git reset --hard +origin/main`` dance and try again). ``git bisect run`` ================== diff --git a/llvm/docs/GlobalISel/IRTranslator.rst b/llvm/docs/GlobalISel/IRTranslator.rst index 712fe95a82925..9e12fdcbcbe5b 100644 --- a/llvm/docs/GlobalISel/IRTranslator.rst +++ b/llvm/docs/GlobalISel/IRTranslator.rst @@ -91,5 +91,5 @@ This is beneficial as it allows us to fold constants into immediate operands during :ref:`instructionselect`, while still avoiding redundant materializations for expensive non-foldable constants. However, this can lead to unnecessary spills and reloads in an -O0 pipeline, as these virtual registers can have long -live ranges. This can be mitigated by running a `localizer `_ +live ranges. This can be mitigated by running a `localizer `_ after the translator. diff --git a/llvm/docs/LibFuzzer.rst b/llvm/docs/LibFuzzer.rst index 70a3f029c6f3e..b251895bc8e90 100644 --- a/llvm/docs/LibFuzzer.rst +++ b/llvm/docs/LibFuzzer.rst @@ -571,7 +571,7 @@ Periodically restart both fuzzers so that they can use each other's findings. Currently, there is no simple way to run both fuzzing engines in parallel while sharing the same corpus dir. You may also use AFL on your target function ``LLVMFuzzerTestOneInput``: -see an example `here `__. +see an example `here `__. How good is my fuzzer? ---------------------- @@ -815,7 +815,7 @@ Trophies .. _AddressSanitizer: https://clang.llvm.org/docs/AddressSanitizer.html .. _LeakSanitizer: https://clang.llvm.org/docs/LeakSanitizer.html .. _Heartbleed: http://en.wikipedia.org/wiki/Heartbleed -.. _FuzzerInterface.h: https://github.com/llvm/llvm-project/blob/master/compiler-rt/lib/fuzzer/FuzzerInterface.h +.. _FuzzerInterface.h: https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/fuzzer/FuzzerInterface.h .. _3.7.0: https://llvm.org/releases/3.7.0/docs/LibFuzzer.html .. _building Clang from trunk: https://clang.llvm.org/get_started.html .. _MemorySanitizer: https://clang.llvm.org/docs/MemorySanitizer.html diff --git a/llvm/docs/TestingGuide.rst b/llvm/docs/TestingGuide.rst index 0f91292057569..2070e38427615 100644 --- a/llvm/docs/TestingGuide.rst +++ b/llvm/docs/TestingGuide.rst @@ -73,7 +73,7 @@ transforming it. They are tested in general using the same infrastructure as the regression tests, by creating a separate "Printer" pass to consume the analysis result and print it on the standard output in a textual format suitable for FileCheck. -See `llvm/test/Analysis/BranchProbabilityInfo/loop.ll `_ +See `llvm/test/Analysis/BranchProbabilityInfo/loop.ll `_ for an example of such test. ``test-suite`` diff --git a/llvm/docs/TypeMetadata.rst b/llvm/docs/TypeMetadata.rst index 74d439411497e..5fa864dc8ab21 100644 --- a/llvm/docs/TypeMetadata.rst +++ b/llvm/docs/TypeMetadata.rst @@ -223,7 +223,7 @@ efficiently to minimize the sizes of the underlying bitsets. ret void } -.. _GlobalLayoutBuilder: https://github.com/llvm/llvm-project/blob/master/llvm/include/llvm/Transforms/IPO/LowerTypeTests.h +.. _GlobalLayoutBuilder: https://github.com/llvm/llvm-project/blob/main/llvm/include/llvm/Transforms/IPO/LowerTypeTests.h ``!vcall_visibility`` Metadata ============================== From 82bd64fff63272c92b91a951ffde678fb9af4899 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Sun, 20 Dec 2020 21:30:33 +0100 Subject: [PATCH 029/378] [AA] byval argument is identified function local byval arguments should mostly get the same treatment as noalias arguments in alias analysis. This was not the case for the isIdentifiedFunctionLocal() function. Marking byval arguments as identified function local means that they cannot alias with other arguments, which I believe is correct. Differential Revision: https://reviews.llvm.org/D93602 --- llvm/include/llvm/Analysis/AliasAnalysis.h | 3 --- llvm/lib/Analysis/AliasAnalysis.cpp | 10 +++++----- llvm/test/Analysis/BasicAA/noalias-param.ll | 4 ++-- 3 files changed, 7 insertions(+), 10 deletions(-) diff --git a/llvm/include/llvm/Analysis/AliasAnalysis.h b/llvm/include/llvm/Analysis/AliasAnalysis.h index b84febaeeeaa9..98a2a7fb075ae 100644 --- a/llvm/include/llvm/Analysis/AliasAnalysis.h +++ b/llvm/include/llvm/Analysis/AliasAnalysis.h @@ -1097,9 +1097,6 @@ template class AAResultBase { /// Return true if this pointer is returned by a noalias function. bool isNoAliasCall(const Value *V); -/// Return true if this is an argument with the noalias attribute. -bool isNoAliasArgument(const Value *V); - /// Return true if this pointer refers to a distinct and identifiable object. /// This returns true for: /// Global Variables and Functions (but not Global Aliases) diff --git a/llvm/lib/Analysis/AliasAnalysis.cpp b/llvm/lib/Analysis/AliasAnalysis.cpp index 7d4969cc24c4d..f5b62ef06a23a 100644 --- a/llvm/lib/Analysis/AliasAnalysis.cpp +++ b/llvm/lib/Analysis/AliasAnalysis.cpp @@ -943,9 +943,9 @@ bool llvm::isNoAliasCall(const Value *V) { return false; } -bool llvm::isNoAliasArgument(const Value *V) { +static bool isNoAliasOrByValArgument(const Value *V) { if (const Argument *A = dyn_cast(V)) - return A->hasNoAliasAttr(); + return A->hasNoAliasAttr() || A->hasByValAttr(); return false; } @@ -956,13 +956,13 @@ bool llvm::isIdentifiedObject(const Value *V) { return true; if (isNoAliasCall(V)) return true; - if (const Argument *A = dyn_cast(V)) - return A->hasNoAliasAttr() || A->hasByValAttr(); + if (isNoAliasOrByValArgument(V)) + return true; return false; } bool llvm::isIdentifiedFunctionLocal(const Value *V) { - return isa(V) || isNoAliasCall(V) || isNoAliasArgument(V); + return isa(V) || isNoAliasCall(V) || isNoAliasOrByValArgument(V); } void llvm::getAAResultsAnalysisUsage(AnalysisUsage &AU) { diff --git a/llvm/test/Analysis/BasicAA/noalias-param.ll b/llvm/test/Analysis/BasicAA/noalias-param.ll index aab55595da2a9..81c89feaabb41 100644 --- a/llvm/test/Analysis/BasicAA/noalias-param.ll +++ b/llvm/test/Analysis/BasicAA/noalias-param.ll @@ -22,9 +22,9 @@ entry: ret void } -; TODO: Result should be the same for byval instead of noalias. +; Result should be the same for byval instead of noalias. ; CHECK-LABEL: byval -; CHECK: MayAlias: i32* %a, i32* %b +; CHECK: NoAlias: i32* %a, i32* %b define void @byval(i32* byval(i32) %a, i32* %b) nounwind { entry: store i32 1, i32* %a From 3fa2d37eb3f8acddcfde749ca822f2cc7d900cbb Mon Sep 17 00:00:00 2001 From: Quentin Chateau Date: Mon, 21 Dec 2020 20:16:47 +0100 Subject: [PATCH 030/378] [clangd][NFC] Improve clangd status messages clangd actions have various naming schemes, the most common being PascalCase. This commit applies PascalCase to all clangd actions, and fix the status rendering in `renderTUAction` to look more consistent. Reviewed By: sammccall Differential Revision: https://reviews.llvm.org/D93546 --- clang-tools-extra/clangd/ClangdServer.cpp | 8 ++++---- clang-tools-extra/clangd/TUScheduler.cpp | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/clang-tools-extra/clangd/ClangdServer.cpp b/clang-tools-extra/clangd/ClangdServer.cpp index 8b0d0591abe70..b760b31c0b87b 100644 --- a/clang-tools-extra/clangd/ClangdServer.cpp +++ b/clang-tools-extra/clangd/ClangdServer.cpp @@ -621,7 +621,7 @@ void ClangdServer::typeHierarchy(PathRef File, Position Pos, int Resolve, File)); }; - WorkScheduler.runWithAST("Type Hierarchy", File, std::move(Action)); + WorkScheduler.runWithAST("TypeHierarchy", File, std::move(Action)); } void ClangdServer::resolveTypeHierarchy( @@ -642,7 +642,7 @@ void ClangdServer::prepareCallHierarchy( return CB(InpAST.takeError()); CB(clangd::prepareCallHierarchy(InpAST->AST, Pos, File)); }; - WorkScheduler.runWithAST("Call Hierarchy", File, std::move(Action)); + WorkScheduler.runWithAST("CallHierarchy", File, std::move(Action)); } void ClangdServer::incomingCalls( @@ -678,7 +678,7 @@ void ClangdServer::documentSymbols(llvm::StringRef File, return CB(InpAST.takeError()); CB(clangd::getDocumentSymbols(InpAST->AST)); }; - WorkScheduler.runWithAST("documentSymbols", File, std::move(Action), + WorkScheduler.runWithAST("DocumentSymbols", File, std::move(Action), TUScheduler::InvalidateOnUpdate); } @@ -690,7 +690,7 @@ void ClangdServer::foldingRanges(llvm::StringRef File, return CB(InpAST.takeError()); CB(clangd::getFoldingRanges(InpAST->AST)); }; - WorkScheduler.runWithAST("foldingRanges", File, std::move(Action), + WorkScheduler.runWithAST("FoldingRanges", File, std::move(Action), TUScheduler::InvalidateOnUpdate); } diff --git a/clang-tools-extra/clangd/TUScheduler.cpp b/clang-tools-extra/clangd/TUScheduler.cpp index 813a000b41a5f..7a858664faa5a 100644 --- a/clang-tools-extra/clangd/TUScheduler.cpp +++ b/clang-tools-extra/clangd/TUScheduler.cpp @@ -1220,7 +1220,7 @@ std::string renderTUAction(const PreambleAction PA, const ASTAction &AA) { } if (Result.empty()) return "idle"; - return llvm::join(Result, ","); + return llvm::join(Result, ", "); } } // namespace From 3f3ab03ab7bbaf13329b0ff07c5d3de40970bfcd Mon Sep 17 00:00:00 2001 From: Pavel Labath Date: Mon, 21 Dec 2020 20:39:05 +0100 Subject: [PATCH 031/378] [lldb] Remove anonymous namespace from NativeRegisterContextLinux_x86_64 Use "static" instead. --- .../Linux/NativeRegisterContextLinux_x86_64.cpp | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_x86_64.cpp b/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_x86_64.cpp index 6462441249c04..c6aa320c0c142 100644 --- a/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_x86_64.cpp +++ b/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_x86_64.cpp @@ -41,11 +41,8 @@ static inline int get_cpuid_count(unsigned int __leaf, using namespace lldb_private; using namespace lldb_private::process_linux; -// Private namespace. - -namespace { // x86 32-bit general purpose registers. -const uint32_t g_gpr_regnums_i386[] = { +static const uint32_t g_gpr_regnums_i386[] = { lldb_eax_i386, lldb_ebx_i386, lldb_ecx_i386, lldb_edx_i386, lldb_edi_i386, lldb_esi_i386, lldb_ebp_i386, lldb_esp_i386, lldb_eip_i386, lldb_eflags_i386, lldb_cs_i386, lldb_fs_i386, @@ -62,7 +59,7 @@ static_assert((sizeof(g_gpr_regnums_i386) / sizeof(g_gpr_regnums_i386[0])) - "g_gpr_regnums_i386 has wrong number of register infos"); // x86 32-bit floating point registers. -const uint32_t g_fpu_regnums_i386[] = { +static const uint32_t g_fpu_regnums_i386[] = { lldb_fctrl_i386, lldb_fstat_i386, lldb_ftag_i386, lldb_fop_i386, lldb_fiseg_i386, lldb_fioff_i386, lldb_foseg_i386, lldb_fooff_i386, lldb_mxcsr_i386, lldb_mxcsrmask_i386, lldb_st0_i386, lldb_st1_i386, @@ -80,7 +77,7 @@ static_assert((sizeof(g_fpu_regnums_i386) / sizeof(g_fpu_regnums_i386[0])) - "g_fpu_regnums_i386 has wrong number of register infos"); // x86 32-bit AVX registers. -const uint32_t g_avx_regnums_i386[] = { +static const uint32_t g_avx_regnums_i386[] = { lldb_ymm0_i386, lldb_ymm1_i386, lldb_ymm2_i386, lldb_ymm3_i386, lldb_ymm4_i386, lldb_ymm5_i386, lldb_ymm6_i386, lldb_ymm7_i386, LLDB_INVALID_REGNUM // register sets need to end with this flag @@ -196,7 +193,7 @@ static_assert((sizeof(g_mpx_regnums_x86_64) / sizeof(g_mpx_regnums_x86_64[0])) - "g_mpx_regnums_x86_64 has wrong number of register infos"); // Number of register sets provided by this context. -enum { k_num_extended_register_sets = 2, k_num_register_sets = 4 }; +constexpr unsigned k_num_extended_register_sets = 2, k_num_register_sets = 4; // Register sets for x86 32-bit. static const RegisterSet g_reg_sets_i386[k_num_register_sets] = { @@ -219,7 +216,6 @@ static const RegisterSet g_reg_sets_x86_64[k_num_register_sets] = { g_avx_regnums_x86_64}, { "Memory Protection Extensions", "mpx", k_num_mpx_registers_x86_64, g_mpx_regnums_x86_64}}; -} #define REG_CONTEXT_SIZE (GetRegisterInfoInterface().GetGPRSize() + sizeof(FPR)) From a817594de9269b9ac8055a2ff2a22ab824cf143d Mon Sep 17 00:00:00 2001 From: Jez Ng Date: Mon, 21 Dec 2020 14:43:58 -0500 Subject: [PATCH 032/378] [lld-macho][nfc] Clean up tests * Migrate most of our tests to use `split-file` instead of `echo` * Remove individual `rm -f %t/libfoo.a` commands in favor of a top-level `rm -rf %t` * Remove unused `Inputs/libfunction.s` Reviewed By: #lld-macho, compnerd Differential Revision: https://reviews.llvm.org/D93604 --- lld/test/MachO/Inputs/libfunction.s | 6 --- lld/test/MachO/archive.s | 30 +++++++++---- lld/test/MachO/common-symbol-resolution.s | 3 +- lld/test/MachO/filelist.s | 22 +++++++--- lld/test/MachO/force-load.s | 22 +++++++--- lld/test/MachO/framework.s | 12 ++++-- lld/test/MachO/invalid/archive-no-index.s | 25 ++++++++--- lld/test/MachO/invalid/bad-archive-member.s | 3 +- lld/test/MachO/lto-archive.ll | 3 +- lld/test/MachO/objc.s | 4 +- lld/test/MachO/order-file.s | 1 - lld/test/MachO/resolution.s | 16 ++++--- lld/test/MachO/section-merge.s | 41 +++++++++++++++--- lld/test/MachO/stabs.s | 3 +- lld/test/MachO/subsections-section-relocs.s | 12 +++--- lld/test/MachO/subsections-symbol-relocs.s | 37 ++++++++-------- lld/test/MachO/symbol-order.s | 42 +++++++++++++++---- lld/test/MachO/weak-definition-direct-fetch.s | 22 +++++++--- .../MachO/weak-definition-indirect-fetch.s | 24 ++++++++--- lld/test/MachO/weak-definition-order.s | 21 ++++++++-- lld/test/MachO/weak-definition-over-dysym.s | 35 +++++++++++----- 21 files changed, 272 insertions(+), 112 deletions(-) delete mode 100644 lld/test/MachO/Inputs/libfunction.s diff --git a/lld/test/MachO/Inputs/libfunction.s b/lld/test/MachO/Inputs/libfunction.s deleted file mode 100644 index fe0b3879a41ab..0000000000000 --- a/lld/test/MachO/Inputs/libfunction.s +++ /dev/null @@ -1,6 +0,0 @@ -.section __TEXT,__text -.globl _some_function - -_some_function: - mov $1, %rax - ret diff --git a/lld/test/MachO/archive.s b/lld/test/MachO/archive.s index abde623b07f4a..2ac2d302b88dd 100644 --- a/lld/test/MachO/archive.s +++ b/lld/test/MachO/archive.s @@ -1,11 +1,10 @@ # REQUIRES: x86 -# RUN: mkdir -p %t -# RUN: echo ".global _boo; _boo: ret" | llvm-mc -filetype=obj -triple=x86_64-apple-darwin -o %t/2.o -# RUN: echo ".global _bar; _bar: ret" | llvm-mc -filetype=obj -triple=x86_64-apple-darwin -o %t/3.o -# RUN: echo ".global _undefined; .global _unused; _unused: ret" | llvm-mc -filetype=obj -triple=x86_64-apple-darwin -o %t/4.o -# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t/main.o +# RUN: rm -rf %t; split-file %s %t +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/2.s -o %t/2.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/3.s -o %t/3.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/4.s -o %t/4.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/main.s -o %t/main.o -# RUN: rm -f %t/test.a # RUN: llvm-ar rcs %t/test.a %t/2.o %t/3.o %t/4.o # RUN: %lld %t/main.o %t/test.a -o %t/test.out @@ -33,9 +32,24 @@ # ALL-LOAD: T _main # ALL-LOAD: T _unused -.global _main +#--- 2.s +.globl _boo +_boo: + ret + +#--- 3.s +.globl _bar +_bar: + ret + +#--- 4.s +.globl _undefined, _unused +_unused: + ret + +#--- main.s +.globl _main _main: callq _boo callq _bar - mov $0, %rax ret diff --git a/lld/test/MachO/common-symbol-resolution.s b/lld/test/MachO/common-symbol-resolution.s index 2a88ef51e4604..1dc015816da4b 100644 --- a/lld/test/MachO/common-symbol-resolution.s +++ b/lld/test/MachO/common-symbol-resolution.s @@ -1,5 +1,5 @@ # REQUIRES: x86 -# RUN: split-file %s %t +# RUN: rm -rf %t; split-file %s %t # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/common.s -o %t/common.o # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/weak-common.s -o %t/weak-common.o @@ -11,7 +11,6 @@ # RUN: %lld -lSystem -order_file %t/order -dylib %t/libfoo.o -o %t/libfoo.dylib -# RUN: rm -f %t/defined.a %t/weak-defined-and-common.a # RUN: llvm-ar rcs %t/defined.a %t/defined.o # RUN: llvm-ar rcs %t/weak-defined-and-common.a %t/weak-defined.o %t/common.o diff --git a/lld/test/MachO/filelist.s b/lld/test/MachO/filelist.s index 32be332aa507a..3d4846c4505a9 100644 --- a/lld/test/MachO/filelist.s +++ b/lld/test/MachO/filelist.s @@ -3,10 +3,10 @@ ## This test verifies that the paths in -filelist get processed in command-line ## order. -# RUN: mkdir -p %t -# RUN: echo ".globl _foo; .weak_definition _foo; .section __TEXT,first; _foo:" | llvm-mc -filetype=obj -triple=x86_64-apple-darwin -o %t/first.o -# RUN: echo ".globl _foo; .weak_definition _foo; .section __TEXT,second; _foo:" | llvm-mc -filetype=obj -triple=x86_64-apple-darwin -o %t/second.o -# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t/test.o +# RUN: rm -rf %t; split-file %s %t +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/first.s -o %t/first.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/second.s -o %t/second.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/test.s -o %t/test.o # FIRST: __TEXT,first _foo # SECOND: __TEXT,second _foo @@ -34,7 +34,19 @@ # RUN: %lld -filelist filelist-2 -filelist filelist-1 %t/test.o -o %t/test # RUN: llvm-objdump --syms %t/test | FileCheck %s --check-prefix=SECOND -.globl _main +#--- first.s +.globl _foo +.weak_definition _foo +.section __TEXT,first +_foo: + +#--- second.s +.globl _foo +.weak_definition _foo +.section __TEXT,second +_foo: +#--- test.s +.globl _main _main: ret diff --git a/lld/test/MachO/force-load.s b/lld/test/MachO/force-load.s index ffc2019eacd97..1cedae3173bca 100644 --- a/lld/test/MachO/force-load.s +++ b/lld/test/MachO/force-load.s @@ -1,10 +1,9 @@ # REQUIRES: x86 -# RUN: mkdir -p %t -# RUN: echo ".section __TEXT,archive; .globl _foo; .weak_definition _foo; _foo:" | llvm-mc -filetype=obj -triple=x86_64-apple-darwin -o %t/archive-foo.o -# RUN: rm -f %t/foo.a +# RUN: rm -rf %t; split-file %s %t +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/archive-foo.s -o %t/archive-foo.o # RUN: llvm-ar rcs %t/foo.a %t/archive-foo.o -# RUN: echo ".section __TEXT,obj; .globl _foo; .weak_definition _foo; _foo:" | llvm-mc -filetype=obj -triple=x86_64-apple-darwin -o %t/foo.o -# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t/test.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/foo.s -o %t/foo.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/test.s -o %t/test.o # RUN: %lld -force_load %t/foo.a %t/foo.o %t/test.o -o %t/test-force-load-first # FORCE-LOAD-FIRST: __TEXT,archive _foo @@ -14,6 +13,19 @@ # RUN: llvm-objdump --syms %t/test-force-load-second | FileCheck %s --check-prefix=FORCE-LOAD-SECOND # FORCE-LOAD-SECOND: __TEXT,obj _foo +#--- archive-foo.s +.section __TEXT,archive +.globl _foo +.weak_definition _foo +_foo: + +#--- foo.s +.section __TEXT,obj +.globl _foo +.weak_definition _foo +_foo: + +#--- test.s .globl _main _main: ret diff --git a/lld/test/MachO/framework.s b/lld/test/MachO/framework.s index 43e1ec4fccafb..7244387a022b3 100644 --- a/lld/test/MachO/framework.s +++ b/lld/test/MachO/framework.s @@ -1,13 +1,13 @@ # REQUIRES: x86, shell -# RUN: mkdir -p %t -# RUN: echo ".globl _foo; _foo: ret" | llvm-mc -filetype=obj -triple=x86_64-apple-darwin -o %t/foo.o +# RUN: rm -rf %t; split-file %s %t +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/foo.s -o %t/foo.o # RUN: mkdir -p %t/Foo.framework/Versions/A # RUN: %lld -dylib -install_name %t/Foo.framework/Versions/A/Foo %t/foo.o -o %t/Foo.framework/Versions/A/Foo # RUN: %lld -dylib -install_name %t/Foo.framework/Versions/A/Foobar %t/foo.o -o %t/Foo.framework/Versions/A/Foobar # RUN: ln -sf %t/Foo.framework/Versions/A %t/Foo.framework/Versions/Current # RUN: ln -sf %t/Foo.framework/Versions/Current/Foo %t/Foo.framework/Foo -# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin -o %t/test.o %s +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/test.s -o %t/test.o # RUN: %lld -lSystem -F%t -framework Foo %t/test.o -o %t/test # RUN: llvm-objdump --macho --lazy-bind %t/test | FileCheck %s --check-prefix=NOSUFFIX # NOSUFFIX: __DATA __la_symbol_ptr 0x{{[0-9a-f]*}} {{.*}}Foo _foo @@ -19,6 +19,12 @@ # RUN: llvm-objdump --macho --lazy-bind %t/test-suffix | FileCheck %s --check-prefix=SUFFIX # SUFFIX: __DATA __la_symbol_ptr 0x{{[0-9a-f]*}} {{.*}}Foobar _foo +#--- foo.s +.globl _foo +_foo: + ret + +#--- test.s .globl _main .text _main: diff --git a/lld/test/MachO/invalid/archive-no-index.s b/lld/test/MachO/invalid/archive-no-index.s index d4d2afb449b7b..9cda945652500 100644 --- a/lld/test/MachO/invalid/archive-no-index.s +++ b/lld/test/MachO/invalid/archive-no-index.s @@ -1,16 +1,31 @@ # REQUIRES: x86 -# RUN: mkdir -p %t -# RUN: echo ".global _boo; _boo: ret" | llvm-mc -filetype=obj -triple=x86_64-apple-darwin -o %t/2.o -# RUN: echo ".global _bar; _bar: ret" | llvm-mc -filetype=obj -triple=x86_64-apple-darwin -o %t/3.o -# RUN: echo ".global _undefined; .global _unused; _unused: ret" | llvm-mc -filetype=obj -triple=x86_64-apple-darwin -o %t/4.o +# RUN: rm -rf %t; split-file %s %t +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/2.s -o %t/2.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/3.s -o %t/3.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/4.s -o %t/4.o # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t/main.o -# RUN: rm -f %t/test.a # RUN: llvm-ar rcS %t/test.a %t/2.o %t/3.o %t/4.o # RUN: not %lld %t/test.o %t/test.a -o /dev/null 2>&1 | FileCheck %s # CHECK: error: {{.*}}.a: archive has no index; run ranlib to add one +#--- 2.s +.globl _boo +_boo: + ret + +#--- 3.s +.globl _bar +_bar: + ret + +#--- 4.s +.globl _undefined, _unused +_unused: + ret + +#--- main.s .global _main _main: mov $0, %rax diff --git a/lld/test/MachO/invalid/bad-archive-member.s b/lld/test/MachO/invalid/bad-archive-member.s index 15ab524476d32..a76cecf239b7a 100644 --- a/lld/test/MachO/invalid/bad-archive-member.s +++ b/lld/test/MachO/invalid/bad-archive-member.s @@ -1,9 +1,8 @@ # REQUIRES: x86 -# RUN: split-file %s %t +# RUN: rm -rf %t; split-file %s %t # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/foo.s -o %t/foo.o # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/test.s -o %t/test.o # RUN: %lld -dylib -lSystem %t/foo.o -o %t/foo.dylib -# RUN: rm -f %t/foo.a # RUN: llvm-ar rcs %t/foo.a %t/foo.dylib # RUN: not %lld %t/test.o %t/foo.a -o /dev/null 2>&1 | FileCheck %s -DFILE=%t/foo.a # CHECK: error: [[FILE]]: archive member foo.dylib has unhandled file type diff --git a/lld/test/MachO/lto-archive.ll b/lld/test/MachO/lto-archive.ll index 42b2c0fb5759f..2aa7beb0c9511 100644 --- a/lld/test/MachO/lto-archive.ll +++ b/lld/test/MachO/lto-archive.ll @@ -1,8 +1,7 @@ ; REQUIRES: x86 -; RUN: split-file %s %t +; RUN: rm -rf %t; split-file %s %t ; RUN: llvm-as %t/foo.ll -o %t/foo.o ; RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/test.s -o %t/test.o -; RUN: rm -f %t/foo.a ; RUN: llvm-ar rcs %t/foo.a %t/foo.o ; RUN: %lld -save-temps -lSystem %t/test.o %t/foo.a -o %t/test ; RUN: llvm-objdump -d --macho --no-show-raw-insn %t/test | FileCheck %s diff --git a/lld/test/MachO/objc.s b/lld/test/MachO/objc.s index 48ca60bc03225..53dd12e8f1909 100644 --- a/lld/test/MachO/objc.s +++ b/lld/test/MachO/objc.s @@ -1,12 +1,10 @@ # REQUIRES: x86 -# RUN: split-file %s %t +# RUN: rm -rf %t; split-file %s %t # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/has-objc-symbol.s -o %t/has-objc-symbol.o # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/has-objc-category.s -o %t/has-objc-category.o # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/has-swift.s -o %t/has-swift.o # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/no-objc.s -o %t/no-objc.o - -# RUN: rm -f %t/libHasSomeObjC.a # RUN: llvm-ar rcs %t/libHasSomeObjC.a %t/has-objc-symbol.o %t/has-objc-category.o %t/has-swift.o %t/no-objc.o # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/test.s -o %t/test.o diff --git a/lld/test/MachO/order-file.s b/lld/test/MachO/order-file.s index 83e6840c97861..c185572c23891 100644 --- a/lld/test/MachO/order-file.s +++ b/lld/test/MachO/order-file.s @@ -2,7 +2,6 @@ # RUN: rm -rf %t; split-file %s %t # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/test.s -o %t/test.o # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/foo.s -o %t/foo.o -# RUN: rm -f %t/foo.a # RUN: llvm-ar rcs %t/foo.a %t/foo.o # FOO-FIRST: <_bar>: diff --git a/lld/test/MachO/resolution.s b/lld/test/MachO/resolution.s index 056324bddbde3..da6ed086548fd 100644 --- a/lld/test/MachO/resolution.s +++ b/lld/test/MachO/resolution.s @@ -1,18 +1,17 @@ # REQUIRES: x86 -# RUN: mkdir -p %t -# RUN: echo '.globl _foo, _bar, _baz; _foo: _bar: _baz:' | \ -# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin -o %t/libresolution.o +# RUN: rm -rf %t; split-file %s %t +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/libresolution.s -o %t/libresolution.o # RUN: %lld -dylib -install_name \ # RUN: @executable_path/libresolution.dylib %t/libresolution.o -o %t/libresolution.dylib # RUN: %lld -dylib -install_name \ # RUN: @executable_path/libresolution2.dylib %t/libresolution.o -o %t/libresolution2.dylib -# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t/resolution.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/resolution.s -o %t/resolution.o ## Check that we select the symbol defined in the first dylib passed on the ## command line. # RUN: %lld -o %t/dylib-first -L%t -lresolution -lresolution2 %t/resolution.o # RUN: llvm-objdump --macho --bind %t/dylib-first | FileCheck %s --check-prefix=DYLIB-FIRST -# DYLIB-FIRST: libresolution _foo +# DYLIB-FIRST: libresolution _foo # RUN: %lld -o %t/dylib2-first -L%t -lresolution2 -lresolution %t/resolution.o # RUN: llvm-objdump --macho --bind %t/dylib2-first | FileCheck %s --check-prefix=DYLIB2-FIRST @@ -31,6 +30,13 @@ # OBJ-FIRST-NOT: libresolution _bar # OBJ-FIRST-NOT: libresolution _baz +#--- libresolution.s +.globl _foo, _bar, _baz +_foo: +_bar: +_baz: + +#--- resolution.s .globl _main, _bar # Global defined symbol _bar: diff --git a/lld/test/MachO/section-merge.s b/lld/test/MachO/section-merge.s index 7db41573d675e..4dc657a19b5c2 100644 --- a/lld/test/MachO/section-merge.s +++ b/lld/test/MachO/section-merge.s @@ -1,10 +1,10 @@ # REQUIRES: x86 -# RUN: mkdir -p %t +# RUN: rm -rf %t; split-file %s %t ## Verify that we preserve alignment when merging sections. -# RUN: echo ".globl _foo; .data; .p2align 0; _foo: .byte 0xca" | llvm-mc -filetype=obj -triple=x86_64-apple-darwin -o %t/foo.o -# RUN: echo ".globl _bar; .data; .p2align 2; _bar: .byte 0xfe" | llvm-mc -filetype=obj -triple=x86_64-apple-darwin -o %t/bar.o -# RUN: echo ".globl _baz; .data; .p2align 3; _baz: .byte 0xba" | llvm-mc -filetype=obj -triple=x86_64-apple-darwin -o %t/baz.o -# RUN: echo ".globl _qux; .data; .p2align 0; _qux: .quad 0xdeadbeef" | llvm-mc -filetype=obj -triple=x86_64-apple-darwin -o %t/qux.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin -o %t/foo.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin -o %t/bar.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin -o %t/baz.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin -o %t/qux.o # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t/main.o # RUN: %lld -o %t/output %t/foo.o %t/bar.o %t/baz.o %t/qux.o %t/main.o @@ -18,8 +18,37 @@ # CHECK: Contents of section __DATA,__data: # CHECK-NEXT: {{0*}}[[#ADDR]] ca000000 fe000000 baefbead de000000 +#--- foo.s +.globl _foo +.data +.p2align 0 +_foo: + .byte 0xca + +#--- bar.s +.globl _bar +.data +.p2align 2 +_bar: + .byte 0xfe + +#--- baz.s +.globl _baz +.data +.p2align 3 +_baz: + .byte 0xba + +#--- qux.s +.globl _qux +.data +.p2align 0 +_qux: + .quad 0xdeadbeef + +#--- main.s .section __TEXT,__text -.global _main +.globl _main _main: mov $0, %rax diff --git a/lld/test/MachO/stabs.s b/lld/test/MachO/stabs.s index 9634a95be4a29..ad9dacc62804e 100644 --- a/lld/test/MachO/stabs.s +++ b/lld/test/MachO/stabs.s @@ -1,13 +1,12 @@ # REQUIRES: x86, shell # UNSUPPORTED: system-windows -# RUN: split-file %s %t +# RUN: rm -rf %t; split-file %s %t # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/test.s -o %t/test.o # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/foo.s -o %t/foo.o # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/no-debug.s -o %t/no-debug.o ## Set modtimes of the files for deterministic test output. # RUN: env TZ=UTC touch -t "197001010000.16" %t/test.o # RUN: env TZ=UTC touch -t "197001010000.32" %t/foo.o -# RUN: rm -f %t/foo.a # RUN: llvm-ar rcsU %t/foo.a %t/foo.o # RUN: %lld -lSystem %t/test.o %t/foo.o %t/no-debug.o -o %t/test diff --git a/lld/test/MachO/subsections-section-relocs.s b/lld/test/MachO/subsections-section-relocs.s index 7b7ffe20e7baf..44ed5e56041d8 100644 --- a/lld/test/MachO/subsections-section-relocs.s +++ b/lld/test/MachO/subsections-section-relocs.s @@ -1,9 +1,6 @@ # REQUIRES: x86 -# RUN: mkdir -p %t -# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t/test.o - -# RUN: echo "_bar_str" > %t/order-file -# RUN: echo "_foo_str" >> %t/order-file +# RUN: rm -rf %t; split-file %s %t +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/test.s -o %t/test.o # RUN: %lld -o %t/test %t/test.o -order_file %t/order-file # RUN: llvm-objdump --section-headers -d --no-show-raw-insn %t/test | FileCheck %s @@ -20,6 +17,11 @@ # STRINGS: Private symbol # STRINGS: foo +#--- order-file +_bar_str +_foo_str + +#--- test.s .text .globl _main, _foo_str, _bar_str diff --git a/lld/test/MachO/subsections-symbol-relocs.s b/lld/test/MachO/subsections-symbol-relocs.s index 8010a50e7444b..848ac756361b4 100644 --- a/lld/test/MachO/subsections-symbol-relocs.s +++ b/lld/test/MachO/subsections-symbol-relocs.s @@ -1,21 +1,6 @@ # REQUIRES: x86 -# RUN: mkdir -p %t -# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t/test.o - -# RUN: echo "_bar" > %t/order-file-1 -# RUN: echo "_foo" >> %t/order-file-1 -# RUN: echo "_main" >> %t/order-file-1 -## _qux is marked as .alt_entry, so it should not create a new subsection and -## its contents should move with _bar to the start of the output despite the -## order file listing it at the end. -# RUN: echo "_qux" >> %t/order-file-1 - -## _bar and _baz point to the same address, so both order files should achieve -## the same result. -# RUN: echo "_baz" > %t/order-file-2 -# RUN: echo "_foo" >> %t/order-file-2 -# RUN: echo "_main" >> %t/order-file-2 -# RUN: echo "_qux" >> %t/order-file-2 +# RUN: rm -rf %t; split-file %s %t +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/test.s -o %t/test.o # RUN: %lld -o %t/test-1 %t/test.o -order_file %t/order-file-1 # RUN: llvm-objdump -d --no-show-raw-insn %t/test-1 | FileCheck %s @@ -34,6 +19,24 @@ # CHECK-NEXT: movq $0, %rax # CHECK-NEXT: retq +#--- order-file-1 +_bar +_foo +_main +## _qux is marked as .alt_entry, so it should not create a new subsection and +## its contents should move with _bar to the start of the output despite the +## order file listing it at the end. +_qux + +#--- order-file-2 +## _bar and _baz point to the same address, so both order files should achieve +## the same result. +_baz +_foo +_main +_qux + +#--- test.s .text .globl _main, _foo, _bar, _qux .alt_entry _qux diff --git a/lld/test/MachO/symbol-order.s b/lld/test/MachO/symbol-order.s index c61e7c830f6b5..0c904560e5244 100644 --- a/lld/test/MachO/symbol-order.s +++ b/lld/test/MachO/symbol-order.s @@ -1,16 +1,13 @@ # REQUIRES: x86 -# RUN: mkdir -p %t -# RUN: echo ".global f, g; .section __TEXT,test_g; g: callq f" | llvm-mc -filetype=obj -triple=x86_64-apple-darwin -o %t/g.o -# RUN: echo ".global f; .section __TEXT,test_f1; f: ret" | llvm-mc -filetype=obj -triple=x86_64-apple-darwin -o %t/f1.o -# RUN: echo ".global f; .section __TEXT,test_f2; f: ret" | llvm-mc -filetype=obj -triple=x86_64-apple-darwin -o %t/f2.o -# RUN: echo ".global f, g; .section __TEXT,test_fg; f: ret; g: callq f" | llvm-mc -filetype=obj -triple=x86_64-apple-darwin -o %t/fg.o -# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t/test.o +# RUN: rm -rf %t; split-file %s %t +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/g.s -o %t/g.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/f1.s -o %t/f1.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/f2.s -o %t/f2.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/fg.s -o %t/fg.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/test.s -o %t/test.o # RUN: %lld -dylib -o %t/libf1.dylib %t/f1.o -lSystem -# RUN: rm -f %t/libf2_g.a # RUN: llvm-ar rcs %t/libf2_g.a %t/f2.o %t/g.o - -# RUN: rm -f %t/libfg.a # RUN: llvm-ar rcs %t/libfg.a %t/fg.o # RUN: %lld %t/libf1.dylib %t/libf2_g.a %t/test.o -o %t/test.out -lSystem @@ -39,6 +36,33 @@ # ARCHIVE-PRIORITY-NEXT: segment section address dylib symbol # ARCHIVE-PRIORITY-EMPTY: +#--- g.s +.global f, g +.section __TEXT,test_g +g: + callq f + +#--- f1.s +.global f +.section __TEXT,test_f1 +f: + ret + +#--- f2.s +.global f +.section __TEXT,test_f2 +f: + ret + +#--- fg.s +.global f, g +.section __TEXT,test_fg +f: + ret +g: + callq f + +#--- test.s .global g .global _main _main: diff --git a/lld/test/MachO/weak-definition-direct-fetch.s b/lld/test/MachO/weak-definition-direct-fetch.s index 38b809c964817..28bbe65f05f10 100644 --- a/lld/test/MachO/weak-definition-direct-fetch.s +++ b/lld/test/MachO/weak-definition-direct-fetch.s @@ -1,12 +1,12 @@ # REQUIRES: x86 -# RUN: mkdir -p %t +# RUN: rm -rf %t; split-file %s %t ## This test exercises the various possible combinations of weak and non-weak ## symbols that get referenced directly by a relocation in an object file. -# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t/test.o -# RUN: echo ".globl _foo; .section __TEXT,nonweak; _foo:" | llvm-mc -filetype=obj -triple=x86_64-apple-darwin -o %t/foo.o -# RUN: echo ".globl _foo; .weak_definition _foo; .section __TEXT,weak; _foo:" | llvm-mc -filetype=obj -triple=x86_64-apple-darwin -o %t/weakfoo.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/test.s -o %t/test.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/foo.s -o %t/foo.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/weakfoo.s -o %t/weakfoo.o # RUN: %lld -dylib -install_name \ # RUN: @executable_path/libfoo.dylib %t/foo.o -o %t/libfoo.dylib @@ -24,9 +24,7 @@ # RUN: llvm-nm %t/libweakfoo.dylib 2>&1 | FileCheck %s --check-prefix=NOSYM # NOSYM: no symbols -# RUN: rm -f %t/foo.a # RUN: llvm-ar --format=darwin rcs %t/foo.a %t/foo.o -# RUN: rm -f %t/weakfoo.a # RUN: llvm-ar --format=darwin rcs %t/weakfoo.a %t/weakfoo.o ## End of input file setup. The following lines check which symbol "wins" when @@ -84,6 +82,18 @@ # RUN: %lld -lSystem -o %t/nonweak-ar-weak-obj -L%t %t/foo.a %t/weakfoo.o %t/test.o # RUN: llvm-objdump --macho --lazy-bind --syms %t/nonweak-ar-weak-obj | FileCheck %s --check-prefix=PREFER-WEAK-OBJECT +#--- foo.s +.globl _foo +.section __TEXT,nonweak +_foo: + +#--- weakfoo.s +.globl _foo +.weak_definition _foo +.section __TEXT,weak +_foo: + +#--- test.s .globl _main _main: callq _foo diff --git a/lld/test/MachO/weak-definition-indirect-fetch.s b/lld/test/MachO/weak-definition-indirect-fetch.s index 3a45852115e01..46ed62441b89a 100644 --- a/lld/test/MachO/weak-definition-indirect-fetch.s +++ b/lld/test/MachO/weak-definition-indirect-fetch.s @@ -1,5 +1,5 @@ # REQUIRES: x86 -# RUN: mkdir -p %t +# RUN: rm -rf %t; split-file %s %t ## This tests examines the effect of .weak_definition on symbols in an archive ## that are not referenced directly, but which are still loaded due to some @@ -9,13 +9,11 @@ ## will be fetched when linking against the main test file due to its references ## to _bar and _baz. -# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t/test.o -# RUN: echo ".globl _foo, _bar; .section __TEXT,nonweak; _bar: _foo:" | llvm-mc -filetype=obj -triple=x86_64-apple-darwin -o %t/foo.o -# RUN: echo ".globl _foo, _baz; .weak_definition _foo; .section __TEXT,weak; _baz: _foo:" | llvm-mc -filetype=obj -triple=x86_64-apple-darwin -o %t/weakfoo.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/test.s -o %t/test.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/foo.s -o %t/foo.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/weakfoo.s -o %t/weakfoo.o -# RUN: rm -f %t/foo.a # RUN: llvm-ar --format=darwin rcs %t/foo.a %t/foo.o -# RUN: rm -f %t/weakfoo.a # RUN: llvm-ar --format=darwin rcs %t/weakfoo.a %t/weakfoo.o # PREFER-NONWEAK-OBJECT: O __TEXT,nonweak _foo @@ -35,6 +33,20 @@ # RUN: %lld -lSystem -o %t/nonweak-ar-weak-obj -L%t %t/foo.a %t/weakfoo.o %t/test.o # RUN: llvm-objdump --syms %t/nonweak-ar-weak-obj | FileCheck %s --check-prefix=PREFER-NONWEAK-OBJECT +#--- foo.s +.globl _foo, _bar +.section __TEXT,nonweak +_bar: +_foo: + +#--- weakfoo.s +.globl _foo, _baz +.weak_definition _foo +.section __TEXT,weak +_baz: +_foo: + +#--- test.s .globl _main _main: callq _bar diff --git a/lld/test/MachO/weak-definition-order.s b/lld/test/MachO/weak-definition-order.s index 01875e82d5641..addf6ab8f8ec5 100644 --- a/lld/test/MachO/weak-definition-order.s +++ b/lld/test/MachO/weak-definition-order.s @@ -1,13 +1,13 @@ # REQUIRES: x86 -# RUN: mkdir -p %t +# RUN: rm -rf %t; split-file %s %t ## This test demonstrates that when we have two weak symbols of the same type, ## we pick the one whose containing file appears earlier in the command-line ## invocation. -# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t/test.o -# RUN: echo ".globl _foo; .weak_definition _foo; .section __TEXT,weak1; _foo:" | llvm-mc -filetype=obj -triple=x86_64-apple-darwin -o %t/weak1.o -# RUN: echo ".globl _foo; .weak_definition _foo; .section __TEXT,weak2; _foo:" | llvm-mc -filetype=obj -triple=x86_64-apple-darwin -o %t/weak2.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/test.s -o %t/test.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/weak1.s -o %t/weak1.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/weak2.s -o %t/weak2.o # RUN: %lld -lSystem -o %t/obj12 -L%t %t/weak1.o %t/weak2.o %t/test.o # RUN: llvm-objdump --syms %t/obj12 | FileCheck %s --check-prefix=WEAK1 @@ -29,6 +29,19 @@ # DYLIB1: __DATA __la_symbol_ptr 0x{{[0-9a-f]*}} pointer 0 libweak1 _foo # DYLIB2: __DATA __la_symbol_ptr 0x{{[0-9a-f]*}} pointer 0 libweak2 _foo +#--- weak1.s +.globl _foo +.weak_definition _foo +.section __TEXT,weak1; +_foo: + +#--- weak2.s +.globl _foo +.weak_definition _foo +.section __TEXT,weak2 +_foo: + +#--- test.s .globl _main _main: callq _foo diff --git a/lld/test/MachO/weak-definition-over-dysym.s b/lld/test/MachO/weak-definition-over-dysym.s index ec1f3353c7c38..ff677a9d4a3f5 100644 --- a/lld/test/MachO/weak-definition-over-dysym.s +++ b/lld/test/MachO/weak-definition-over-dysym.s @@ -1,22 +1,19 @@ # REQUIRES: x86 -# RUN: mkdir -p %t +# RUN: rm -rf %t; split-file %s %t ## This test demonstrates that when an archive file is fetched, its symbols ## always override any conflicting dylib symbols, regardless of any weak ## definition flags. -# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t/test.o -# RUN: echo ".globl _foo; _foo:" | llvm-mc -filetype=obj -triple=x86_64-apple-darwin -o %t/libfoo.o -# RUN: %lld -dylib -install_name \ -# RUN: @executable_path/libfoo.dylib %t/libfoo.o -o %t/libfoo.dylib +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/test.s -o %t/test.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/libfoo.s -o %t/libfoo.o +# RUN: %lld -dylib -install_name @executable_path/libfoo.dylib %t/libfoo.o -o %t/libfoo.dylib -# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t/test.o -# RUN: echo ".globl _foo, _bar; .section __TEXT,nonweak; _bar: _foo:" | llvm-mc -filetype=obj -triple=x86_64-apple-darwin -o %t/foo.o -# RUN: echo ".globl _foo, _bar; .weak_definition _foo; .section __TEXT,weak; _bar: _foo:" | llvm-mc -filetype=obj -triple=x86_64-apple-darwin -o %t/weakfoo.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/test.s -o %t/test.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/foo.s -o %t/foo.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/weakfoo.s -o %t/weakfoo.o -# RUN: rm -f %t/foo.a # RUN: llvm-ar --format=darwin rcs %t/foo.a %t/foo.o -# RUN: rm -f %t/weakfoo.a # RUN: llvm-ar --format=darwin rcs %t/weakfoo.a %t/weakfoo.o # PREFER-WEAK-OBJECT: O __TEXT,weak _foo @@ -32,6 +29,24 @@ # RUN: %lld -lSystem -o %t/nonweak-dylib-weak-obj -L%t -lfoo %t/weakfoo.o %t/test.o # RUN: llvm-objdump --macho --lazy-bind --syms %t/nonweak-dylib-weak-obj | FileCheck %s --check-prefix=PREFER-WEAK-OBJECT +#--- libfoo.s +.globl _foo +_foo: + +#--- foo.s +.globl _foo, _bar +.section __TEXT,nonweak +_bar: +_foo: + +#--- weakfoo.s +.globl _foo, _bar +.weak_definition _foo +.section __TEXT,weak +_bar: +_foo: + +#--- test.s .globl _main _main: callq _foo From 0f8224c2104b7246b36b9f92ffe87aad4d8dd3ac Mon Sep 17 00:00:00 2001 From: Jez Ng Date: Mon, 21 Dec 2020 14:44:01 -0500 Subject: [PATCH 033/378] [lld-macho][nfc] Remove %T from headerpad.s The llvm-lit docs indicate that it is deprecated. Reviewed By: #lld-macho, thakis Differential Revision: https://reviews.llvm.org/D93605 --- lld/test/MachO/headerpad.s | 46 ++++++++++++++++++++------------------ 1 file changed, 24 insertions(+), 22 deletions(-) diff --git a/lld/test/MachO/headerpad.s b/lld/test/MachO/headerpad.s index 0f4f19ce9d62a..750919438cb07 100644 --- a/lld/test/MachO/headerpad.s +++ b/lld/test/MachO/headerpad.s @@ -8,10 +8,12 @@ ## just enforces a lower bound. We should consider implementing the same ## alignment behavior. +# RUN: rm -rf %t; mkdir -p %t + ################ Check default behavior -# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o -# RUN: %lld -o %t %t.o -# RUN: llvm-objdump --macho --all-headers %t | FileCheck %s --check-prefix=PADx +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t/test.o +# RUN: %lld -o %t/test %t/test.o +# RUN: llvm-objdump --macho --all-headers %t/test | FileCheck %s --check-prefix=PADx # # PADx: magic {{.+}} ncmds sizeofcmds flags # PADx-NEXT: MH_MAGIC_64 {{.+}} [[#]] [[#%u, CMDSIZE:]] {{.*}} @@ -22,10 +24,10 @@ # PADx-NEXT: offset [[#%u, CMDSIZE + 0x20 + 0x20]] ################ Zero pad, no LCDylibs -# RUN: %lld -o %t %t.o -headerpad 0 -# RUN: llvm-objdump --macho --all-headers %t | FileCheck %s --check-prefix=PAD0 -# RUN: %lld -o %t %t.o -headerpad 0 -headerpad_max_install_names -# RUN: llvm-objdump --macho --all-headers %t | FileCheck %s --check-prefix=PAD0 +# RUN: %lld -o %t/test %t/test.o -headerpad 0 +# RUN: llvm-objdump --macho --all-headers %t/test | FileCheck %s --check-prefix=PAD0 +# RUN: %lld -o %t/test %t/test.o -headerpad 0 -headerpad_max_install_names +# RUN: llvm-objdump --macho --all-headers %t/test | FileCheck %s --check-prefix=PAD0 # # PAD0: magic {{.+}} ncmds sizeofcmds flags # PAD0-NEXT: MH_MAGIC_64 {{.+}} [[#]] [[#%u, CMDSIZE:]] {{.*}} @@ -36,12 +38,12 @@ # PAD0-NEXT: offset [[#%u, CMDSIZE + 0x20 + 0]] ################ Each lexical form of a hex number, no LCDylibs -# RUN: %lld -o %t %t.o -headerpad 11 -# RUN: llvm-objdump --macho --all-headers %t | FileCheck %s --check-prefix=PAD11 -# RUN: %lld -o %t %t.o -headerpad 0x11 -# RUN: llvm-objdump --macho --all-headers %t | FileCheck %s --check-prefix=PAD11 -# RUN: %lld -o %t %t.o -headerpad 0X11 -headerpad_max_install_names -# RUN: llvm-objdump --macho --all-headers %t | FileCheck %s --check-prefix=PAD11 +# RUN: %lld -o %t/test %t/test.o -headerpad 11 +# RUN: llvm-objdump --macho --all-headers %t/test | FileCheck %s --check-prefix=PAD11 +# RUN: %lld -o %t/test %t/test.o -headerpad 0x11 +# RUN: llvm-objdump --macho --all-headers %t/test | FileCheck %s --check-prefix=PAD11 +# RUN: %lld -o %t/test %t/test.o -headerpad 0X11 -headerpad_max_install_names +# RUN: llvm-objdump --macho --all-headers %t/test | FileCheck %s --check-prefix=PAD11 # # PAD11: magic {{.+}} ncmds sizeofcmds flags # PAD11-NEXT: MH_MAGIC_64 {{.+}} [[#]] [[#%u, CMDSIZE:]] {{.*}} @@ -52,17 +54,17 @@ # PAD11-NEXT: offset [[#%u, CMDSIZE + 0x20 + 0x11]] ################ Each & all 3 kinds of LCDylib -# RUN: echo "" | llvm-mc -filetype=obj -triple=x86_64-apple-darwin -o %T/null.o -# RUN: %lld -o %T/libnull.dylib %T/null.o -dylib \ +# RUN: echo "" | llvm-mc -filetype=obj -triple=x86_64-apple-darwin -o %t/null.o +# RUN: %lld -o %t/libnull.dylib %t/null.o -dylib \ # RUN: -headerpad_max_install_names -# RUN: llvm-objdump --macho --all-headers %T/libnull.dylib | FileCheck %s --check-prefix=PADMAX -# RUN: %lld -o %T/libnull.dylib %T/null.o -dylib \ +# RUN: llvm-objdump --macho --all-headers %t/libnull.dylib | FileCheck %s --check-prefix=PADMAX +# RUN: %lld -o %t/libnull.dylib %t/null.o -dylib \ # RUN: -headerpad_max_install_names -lSystem -# RUN: llvm-objdump --macho --all-headers %T/libnull.dylib | FileCheck %s --check-prefix=PADMAX -# RUN: %lld -o %T/libnull.dylib %T/null.o -dylib \ +# RUN: llvm-objdump --macho --all-headers %t/libnull.dylib | FileCheck %s --check-prefix=PADMAX +# RUN: %lld -o %t/libnull.dylib %t/null.o -dylib \ # RUN: -headerpad_max_install_names \ # RUN: -lSystem -sub_library libSystem -# RUN: llvm-objdump --macho --all-headers %T/libnull.dylib | FileCheck %s --check-prefix=PADMAX +# RUN: llvm-objdump --macho --all-headers %t/libnull.dylib | FileCheck %s --check-prefix=PADMAX # # PADMAX: magic {{.+}} ncmds sizeofcmds flags # PADMAX-NEXT: MH_MAGIC_64 {{.+}} [[#%u, N:]] [[#%u, CMDSIZE:]] {{.*}} @@ -73,10 +75,10 @@ # PADMAX-NEXT: offset [[#%u, CMDSIZE + 0x20 + mul(0x400, N - 7)]] ################ All 3 kinds of LCDylib swamped by a larger override -# RUN: %lld -o %T/libnull.dylib %T/null.o -dylib \ +# RUN: %lld -o %t/libnull.dylib %t/null.o -dylib \ # RUN: -headerpad_max_install_names -headerpad 0x1001 \ # RUN: -lSystem -sub_library libSystem -# RUN: llvm-objdump --macho --all-headers %T/libnull.dylib | FileCheck %s --check-prefix=PADOVR +# RUN: llvm-objdump --macho --all-headers %t/libnull.dylib | FileCheck %s --check-prefix=PADOVR # # PADOVR: magic {{.+}} ncmds sizeofcmds flags # PADOVR-NEXT: MH_MAGIC_64 {{.+}} [[#%u, N:]] [[#%u, CMDSIZE:]] {{.*}} From 8f933a4e931dd1a66f19a81b33399cf7b407308f Mon Sep 17 00:00:00 2001 From: Valentin Clement Date: Mon, 21 Dec 2020 15:06:55 -0500 Subject: [PATCH 034/378] [openacc] Use TableGen enum for default clause value Use the TableGen feature to have enum values for clauses. Next step will be to extend the MLIR part used currently by OpenMP to use the same enum on the dialect side. This patch also add function that convert the enum to StringRef to be used on the dump-parse-tree from flang. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D93576 --- flang/include/flang/Parser/dump-parse-tree.h | 6 +++++- flang/include/flang/Parser/parse-tree.h | 3 +-- flang/lib/Parser/openacc-parsers.cpp | 6 +++--- flang/lib/Parser/unparse.cpp | 4 ++-- flang/lib/Semantics/resolve-directives.cpp | 4 ++-- llvm/include/llvm/Frontend/OpenACC/ACC.td | 8 +++++++ llvm/test/TableGen/directive1.td | 13 ++++++++++++ llvm/utils/TableGen/DirectiveEmitter.cpp | 22 ++++++++++++++++++++ 8 files changed, 56 insertions(+), 10 deletions(-) diff --git a/flang/include/flang/Parser/dump-parse-tree.h b/flang/include/flang/Parser/dump-parse-tree.h index 7e2d713c127fe..f69dd149e0a3a 100644 --- a/flang/include/flang/Parser/dump-parse-tree.h +++ b/flang/include/flang/Parser/dump-parse-tree.h @@ -68,7 +68,11 @@ class ParseTreeDumper { #include "llvm/Frontend/OpenACC/ACC.cpp.inc" NODE(parser, AccBindClause) NODE(parser, AccDefaultClause) - NODE_ENUM(parser::AccDefaultClause, Arg) + static std::string GetNodeName(const llvm::acc::DefaultValue &x) { + return llvm::Twine( + "llvm::acc::DefaultValue = ", llvm::acc::getOpenACCDefaultValueName(x)) + .str(); + } NODE(parser, AccClauseList) NODE(parser, AccCombinedDirective) NODE(parser, AccDataModifier) diff --git a/flang/include/flang/Parser/parse-tree.h b/flang/include/flang/Parser/parse-tree.h index b9d88f6c66f8b..59fa278e00296 100644 --- a/flang/include/flang/Parser/parse-tree.h +++ b/flang/include/flang/Parser/parse-tree.h @@ -3859,8 +3859,7 @@ struct AccBindClause { }; struct AccDefaultClause { - ENUM_CLASS(Arg, None, Present) - WRAPPER_CLASS_BOILERPLATE(AccDefaultClause, Arg); + WRAPPER_CLASS_BOILERPLATE(AccDefaultClause, llvm::acc::DefaultValue); CharBlock source; }; diff --git a/flang/lib/Parser/openacc-parsers.cpp b/flang/lib/Parser/openacc-parsers.cpp index 2447ed70b1a19..26cacc9135c8b 100644 --- a/flang/lib/Parser/openacc-parsers.cpp +++ b/flang/lib/Parser/openacc-parsers.cpp @@ -171,9 +171,9 @@ TYPE_PARSER(sourced(construct(parenthesized(name))) || sourced(construct(parenthesized(scalarDefaultCharExpr)))) // 2.5.14 Default clause -TYPE_PARSER(construct( - parenthesized(first("NONE" >> pure(AccDefaultClause::Arg::None), - "PRESENT" >> pure(AccDefaultClause::Arg::Present))))) +TYPE_PARSER(construct(parenthesized( + first("NONE" >> pure(llvm::acc::DefaultValue::ACC_Default_none), + "PRESENT" >> pure(llvm::acc::DefaultValue::ACC_Default_present))))) // SELF clause is either a simple optional condition for compute construct // or a synonym of the HOST clause for the update directive 2.14.4 holding diff --git a/flang/lib/Parser/unparse.cpp b/flang/lib/Parser/unparse.cpp index b42b09ae723b0..a027c8fc9af69 100644 --- a/flang/lib/Parser/unparse.cpp +++ b/flang/lib/Parser/unparse.cpp @@ -1850,10 +1850,10 @@ class UnparseVisitor { } void Unparse(const AccDefaultClause &x) { switch (x.v) { - case AccDefaultClause::Arg::None: + case llvm::acc::DefaultValue::ACC_Default_none: Put("NONE"); break; - case AccDefaultClause::Arg::Present: + case llvm::acc::DefaultValue::ACC_Default_present: Put("PRESENT"); break; } diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp index 7ca42bac569af..a4297ab425404 100644 --- a/flang/lib/Semantics/resolve-directives.cpp +++ b/flang/lib/Semantics/resolve-directives.cpp @@ -638,10 +638,10 @@ void AccAttributeVisitor::PrivatizeAssociatedLoopIndex( void AccAttributeVisitor::Post(const parser::AccDefaultClause &x) { if (!dirContext_.empty()) { switch (x.v) { - case parser::AccDefaultClause::Arg::Present: + case llvm::acc::DefaultValue::ACC_Default_present: SetContextDefaultDSA(Symbol::Flag::AccPresent); break; - case parser::AccDefaultClause::Arg::None: + case llvm::acc::DefaultValue::ACC_Default_none: SetContextDefaultDSA(Symbol::Flag::AccNone); break; } diff --git a/llvm/include/llvm/Frontend/OpenACC/ACC.td b/llvm/include/llvm/Frontend/OpenACC/ACC.td index d5998845a8390..d53d3132c9694 100644 --- a/llvm/include/llvm/Frontend/OpenACC/ACC.td +++ b/llvm/include/llvm/Frontend/OpenACC/ACC.td @@ -80,8 +80,16 @@ def ACCC_Create : Clause<"create"> { } // 2.5.15 +def ACC_Default_none : ClauseVal<"none", 1, 0> { let isDefault = 1; } +def ACC_Default_present : ClauseVal<"present", 0, 0> {} + def ACCC_Default : Clause<"default"> { let flangClassValue = "AccDefaultClause"; + let enumClauseValue = "DefaultValue"; + let allowedClauseValues = [ + ACC_Default_present, + ACC_Default_none + ]; } // 2.14.3 diff --git a/llvm/test/TableGen/directive1.td b/llvm/test/TableGen/directive1.td index 5d9be74e9fce5..a63b4065f86aa 100644 --- a/llvm/test/TableGen/directive1.td +++ b/llvm/test/TableGen/directive1.td @@ -95,6 +95,7 @@ def TDL_DirA : Directive<"dira"> { // CHECK-NEXT: bool isAllowedClauseForDirective(Directive D, Clause C, unsigned Version); // CHECK-EMPTY: // CHECK-NEXT: AKind getAKind(StringRef); +// CHECK-NEXT: llvm::StringRef getTdlAKindName(AKind); // CHECK-EMPTY: // CHECK-NEXT: } // namespace tdl // CHECK-NEXT: } // namespace llvm @@ -147,6 +148,18 @@ def TDL_DirA : Directive<"dira"> { // IMPL-NEXT: .Default(TDLCV_valc); // IMPL-NEXT: } // IMPL-EMPTY: +// IMPL-NEXT: llvm::StringRef llvm::tdl::getTdlAKindName(llvm::tdl::AKind x) { +// IMPL-NEXT: switch (x) { +// IMPL-NEXT: case TDLCV_vala: +// IMPL-NEXT: return "vala"; +// IMPL-NEXT: case TDLCV_valb: +// IMPL-NEXT: return "valb"; +// IMPL-NEXT: case TDLCV_valc: +// IMPL-NEXT: return "valc"; +// IMPL-NEXT: } +// IMPL-NEXT: llvm_unreachable("Invalid Tdl AKind kind"); +// IMPL-NEXT: } +// IMPL-EMPTY: // IMPL-NEXT: bool llvm::tdl::isAllowedClauseForDirective(Directive D, Clause C, unsigned Version) { // IMPL-NEXT: assert(unsigned(D) <= llvm::tdl::Directive_enumSize); // IMPL-NEXT: assert(unsigned(C) <= llvm::tdl::Clause_enumSize); diff --git a/llvm/utils/TableGen/DirectiveEmitter.cpp b/llvm/utils/TableGen/DirectiveEmitter.cpp index ff6050b07f52b..6dee193899968 100644 --- a/llvm/utils/TableGen/DirectiveEmitter.cpp +++ b/llvm/utils/TableGen/DirectiveEmitter.cpp @@ -107,6 +107,12 @@ void GenerateEnumClauseVal(const std::vector &Records, EnumHelperFuncs += (llvm::Twine(EnumName) + llvm::Twine(" get") + llvm::Twine(EnumName) + llvm::Twine("(StringRef);\n")) .str(); + + EnumHelperFuncs += + (llvm::Twine("llvm::StringRef get") + llvm::Twine(DirLang.getName()) + + llvm::Twine(EnumName) + llvm::Twine("Name(") + + llvm::Twine(EnumName) + llvm::Twine(");\n")) + .str(); } } } @@ -336,6 +342,22 @@ void GenerateGetKindClauseVal(const DirectiveLanguage &DirLang, } OS << " .Default(" << DefaultName << ");\n"; OS << "}\n"; + + OS << "\n"; + OS << "llvm::StringRef llvm::" << DirLang.getCppNamespace() << "::get" + << DirLang.getName() << EnumName + << "Name(llvm::" << DirLang.getCppNamespace() << "::" << EnumName + << " x) {\n"; + OS << " switch (x) {\n"; + for (const auto &CV : ClauseVals) { + ClauseVal CVal{CV}; + OS << " case " << CV->getName() << ":\n"; + OS << " return \"" << CVal.getFormattedName() << "\";\n"; + } + OS << " }\n"; // switch + OS << " llvm_unreachable(\"Invalid " << DirLang.getName() << " " + << EnumName << " kind\");\n"; + OS << "}\n"; } } From b7ae1d3d2b1b1d73374a0583150c452273318268 Mon Sep 17 00:00:00 2001 From: nicolasvasilache Date: Mon, 21 Dec 2020 10:03:12 -0800 Subject: [PATCH 035/378] [mlir][Linalg] Revisit the Linalg on tensors abstraction This revision drops init_tensor arguments from Linalg on tensors and instead uniformizes the output buffers and output tensors to be consistent. This significantly simplifies the usage of Linalg on tensors and is a stepping stone for its evolution towards a mixed tensor and shape abstraction discussed in https://llvm.discourse.group/t/linalg-and-shapes/2421/19. Differential Revision: https://reviews.llvm.org/D93469 --- mlir/docs/Dialects/Linalg.md | 110 +-- .../Linalg/Analysis/DependenceAnalysis.h | 14 +- .../mlir/Dialect/Linalg/EDSC/Builders.h | 15 +- .../mlir/Dialect/Linalg/EDSC/Intrinsics.h | 1 + .../mlir/Dialect/Linalg/IR/LinalgOps.h | 9 +- .../Dialect/Linalg/IR/LinalgStructuredOps.td | 122 ++-- .../Linalg/IR/LinalgStructuredOpsInterface.td | 632 +++++++++++------- .../mlir/Dialect/Linalg/IR/LinalgTraits.h | 166 ----- mlir/include/mlir/IR/OpBase.td | 5 + .../Linalg/CPU/test-tensor-matmul.mlir | 2 +- .../Linalg/Analysis/DependenceAnalysis.cpp | 94 +-- mlir/lib/Dialect/Linalg/EDSC/Builders.cpp | 65 +- mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp | 390 +++++------ .../Dialect/Linalg/Transforms/Bufferize.cpp | 113 ++-- .../Linalg/Transforms/DropUnitDims.cpp | 17 +- .../Linalg/Transforms/ElementwiseToLinalg.cpp | 62 +- mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp | 50 +- .../Linalg/Transforms/FusionOnTensors.cpp | 152 ++++- .../Linalg/Transforms/Generalization.cpp | 8 +- .../Dialect/Linalg/Transforms/Interchange.cpp | 2 +- .../Dialect/Linalg/Transforms/Promotion.cpp | 11 +- .../Linalg/Transforms/Sparsification.cpp | 18 +- mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp | 58 +- .../Dialect/Linalg/Transforms/Transforms.cpp | 17 +- .../Linalg/Transforms/Vectorization.cpp | 7 +- mlir/lib/Dialect/Linalg/Utils/Utils.cpp | 6 - mlir/lib/Dialect/StandardOps/IR/Ops.cpp | 4 +- mlir/test/Dialect/Linalg/bufferize.mlir | 88 ++- .../Linalg/canonicalize-duplicate-inputs.mlir | 38 +- mlir/test/Dialect/Linalg/canonicalize.mlir | 9 +- .../Linalg/convert-elementwise-to-linalg.mlir | 72 +- .../Dialect/Linalg/drop-unit-extent-dims.mlir | 61 +- .../Dialect/Linalg/fold-unit-trip-loops.mlir | 21 +- mlir/test/Dialect/Linalg/fusion-tensor.mlir | 439 +++++++----- .../Dialect/Linalg/generalize-named-ops.mlir | 12 +- mlir/test/Dialect/Linalg/invalid.mlir | 137 ++-- mlir/test/Dialect/Linalg/parallel-loops.mlir | 4 +- mlir/test/Dialect/Linalg/reshape_fusion.mlir | 211 +++--- .../Linalg/reshape_linearization_fusion.mlir | 206 +++--- mlir/test/Dialect/Linalg/roundtrip.mlir | 68 +- mlir/test/Dialect/Linalg/sparse_1d.mlir | 81 ++- mlir/test/Dialect/Linalg/sparse_2d.mlir | 81 ++- mlir/test/Dialect/Linalg/sparse_3d.mlir | 101 +-- mlir/test/Dialect/Linalg/sparse_invalid.mlir | 112 +--- mlir/test/Dialect/Linalg/sparse_parallel.mlir | 12 +- mlir/test/Dialect/Linalg/sparse_storage.mlir | 5 +- .../Dialect/Linalg/tile-and-distribute.mlir | 4 +- .../Dialect/Linalg/tile-and-fuse-tensors.mlir | 8 +- mlir/test/Dialect/Linalg/tile-tensors.mlir | 6 +- mlir/test/EDSC/builder-api-test.cpp | 12 +- .../test-linalg-ods-gen.tc | 3 - .../mlir-linalg-ods-gen.cpp | 28 +- 52 files changed, 2073 insertions(+), 1896 deletions(-) delete mode 100644 mlir/include/mlir/Dialect/Linalg/IR/LinalgTraits.h diff --git a/mlir/docs/Dialects/Linalg.md b/mlir/docs/Dialects/Linalg.md index 02508a81b63ae..18473f4cb7966 100644 --- a/mlir/docs/Dialects/Linalg.md +++ b/mlir/docs/Dialects/Linalg.md @@ -21,8 +21,8 @@ Linalg. They are all implemented in terms of the properties of the one-off op knowledge. The textual form description of these transformations is left for future work. -Still, it is useful to at least the key transformations that are performed on -the Linalg IR and that have influenced its design: +Still, it is useful to list the key transformations that are performed on the +Linalg IR and that have influenced its design: 1. Progressive Buffer Allocation. 1. Parametric Tiling. @@ -42,8 +42,25 @@ Linalg takes at least some inspiration from all previously [key transformations](#key_transformations), including lowering to scalar load/store and other operations or to external library calls and intrinsics. -These ops can have ***either tensor or buffer operands***, subject to -[conventions and limitations](#tensors_and_buffers). +These ops can have ***either tensor or buffer*** as both input and output +operands. Output tensors operands serve the purpose of providing a unifying +abstraction and give a shape to the results. Output tensors can come in 2 +flavors and are always associated with a corresponding op result: + +1. an "init tensor" output value which provides an initial value for a tensor + that is created by iteratively updating the result (also called "destructive + updates"). Such tensor is always materialized in some form. If enough fusion + occurs it may end up being materialized only as a register-level SSA value. + It is expected (but not required) that the destructive update pattern can be + rewritten as an inplace update on buffers. + +2. a "shape-only" tensor output value whose underlying elements are not used in + the payload computation and only serves the purpose of carrying shape + information to lower levels of abstraction. In the future this will be + replaced by an appropriate shape type when it is available as a builtin type + (see the discourse discussion + [Linalg and Shapes](https://llvm.discourse.group/t/linalg-and-shapes/2421) + for more details). ### Payload-Carrying Ops @@ -125,14 +142,15 @@ instance, it guarantees no out-of bounds access can occur by construction (assuming dynamic operand dimensions agree with each other, which is the purpose of the `assert` runtime check). -Before lowering to loop form, loop induction variables and iterators are *not -yet materialized*. This is a necessary property if we want an abstraction that -works on both tensor values and buffers because ***values don’t escape -loops/nesting***. +Before lowering to loop form, loop induction variables and iterators are +implicit (i.e. *not yet materialized*). -The main implications are that: 1. The semantics of the ops are *restricted to -operate on structured data types*, on which we can define an iterator. 2. This -does not model arbitrary code with side-effects. +The main implications are that: + +1. The semantics of the ops are *restricted to operate on structured data + types*, on which we can define an iterator. + +2. This does not model arbitrary code with side-effects. We do not think these are serious limitations in practice because MLIR is all about mixing different levels of abstractions in the same IR. As long as Linalg @@ -483,76 +501,6 @@ because of empirical evidence building and working on multiple high-level compilers. As we lay those down and engage more with the community, we expect multiple rounds of discussions and design changes to the original architecture. -### Tensors and Buffers: Conventions and Limitations - -Tensors are immutable SSA values, buffers are mutable regions of memory subject -to side-effects and aliasing. As a consequence, output buffers are passed as -operands whereas output tensors are new SSA values corresponding to op results. -Inputs can be arbitrary tensors or buffers and are always passed as operands. - -The following convention is currently in-flight and is in the process of -replacing other existing conventions. The following convention currently applies -to "named" structured ops which are auto-generated by the linalg-ods tool. - -The convention adopted is as follows: - -1. A first block of `ins` op operands hold read-only inputs of ShapedType. -2. An optional second block of `outs` op operands hold read-write output - buffers of MemRefType. -3. An optional third block of `init` operands hold initialization tensors of - RankedTensorType. Such tensors can appear when the op performs a reduction - and returns a tensor. - -Structured ops with fully parallel semantics, have empty `init`. They may either -write in-place into `outs` buffers or return new tensors. - -Structured ops with reduction semantics and output tensor(s) however have -additional restrictions: - -1. They can only return a single tensor for now. -2. They cannot have any output buffer operand (i.e. `outs` is empty). -3. They have exactly one `init` tensor of the same type as the unique output - tensor. Such an `init` tensor does not have an explicit associate indexing - map. Instead the map of the result tensor is used to signify that the `init` - and the `result` are "tied". - -Points 1. and 2. keep complexity of the representation in check by allowing only -a single result tensor, when reductions are present. - -Point 3. is related to the fact that SSA values cannot represent in-place -updates. Instead, linalg adopts a similar convention that exists in e.g. -`vector.outerproduct`: the value that is reduced into is passed as an explicit -argument and a new result of the same shape is produced. - -It is expected buffer allocation will fold this last input onto the result in a -single output buffer argument, which is why the same indexing map is required: -the last input operand is said to be "tied" to the result. - -Alternative, more complex representations, would allow for: - -1. Multiple results and `init` tensors in arbitrary orders, which could be - captured by an extra ArrayAttr of position pairs. -2. Relaxing the conditions on the indexing map equalities on the each pair and - e.g. allow implicit broadcasts of the input. - -These representations are deemed unnecessarily complex for now and are left for -future discussion. - -As an illustration, the syntax for a `linalg.matmul` writing into a buffer is: - -``` -linalg.matmul ins(%a, %b : memref, tensor) - outs(%c : memref) -``` - -, whereas the syntax for a `linalg.matmul` returning a new tensor is: - -``` -%d = linalg.matmul ins(%a, %b : tensor, memref) - init(%c : tensor) - -> tensor -``` - ### Data Representation: Views The current implementation uses the diff --git a/mlir/include/mlir/Dialect/Linalg/Analysis/DependenceAnalysis.h b/mlir/include/mlir/Dialect/Linalg/Analysis/DependenceAnalysis.h index 4ee5fac7f677a..9aa50c25cd794 100644 --- a/mlir/include/mlir/Dialect/Linalg/Analysis/DependenceAnalysis.h +++ b/mlir/include/mlir/Dialect/Linalg/Analysis/DependenceAnalysis.h @@ -45,19 +45,17 @@ class Aliases { class LinalgDependenceGraph { public: enum DependenceType { RAR = 0, RAW, WAR, WAW, NumTypes }; - struct LinalgOpView { - Operation *op; - unsigned operandIndex; - }; + // TODO: OpOperand tracks dependencies on buffer operands. Tensor result will + // need an extension to use OpResult. struct LinalgDependenceGraphElem { // dependentOpView may be either: // 1. src in the case of dependencesIntoGraphs. // 2. dst in the case of dependencesFromDstGraphs. - LinalgOpView dependentOpView; + OpOperand *dependentOpView; // View in the op that is used to index in the graph: // 1. src in the case of dependencesFromDstGraphs. // 2. dst in the case of dependencesIntoGraphs. - LinalgOpView indexingOpView; + OpOperand *indexingOpView; // Type of the dependence. DependenceType dependenceType; }; @@ -161,8 +159,8 @@ class LinalgDependenceGraph { // Uses std::pair to keep operations and view together and avoid usage errors // related to src/dst and producer/consumer terminology in the context of // dependences. - void addDependenceElem(DependenceType dt, LinalgOpView indexingOpView, - LinalgOpView dependentOpView); + void addDependenceElem(DependenceType dt, OpOperand *indexingOpView, + OpOperand *dependentOpView); /// Implementation detail for findCoveringxxx. SmallVector diff --git a/mlir/include/mlir/Dialect/Linalg/EDSC/Builders.h b/mlir/include/mlir/Dialect/Linalg/EDSC/Builders.h index ac9ca9581f0d2..43dff8150f770 100644 --- a/mlir/include/mlir/Dialect/Linalg/EDSC/Builders.h +++ b/mlir/include/mlir/Dialect/Linalg/EDSC/Builders.h @@ -30,8 +30,8 @@ class ParallelOp; namespace edsc { inline void defaultRegionBuilder(ValueRange args) {} -/// Build a `linalg.generic` op with the specified `inputs`, `outputBuffers`, -/// `initTensors`, `resultTensorsTypes` and `region`. +/// Build a `linalg.generic` op with the specified `inputs`, `outputs`, +/// `resultTensorsTypes` and `region`. /// /// `otherValues` and `otherAttributes` may be passed and will be appended as /// operands and attributes respectively. @@ -41,15 +41,12 @@ inline void defaultRegionBuilder(ValueRange args) {} /// /// 1. `inputs` may contain StructuredIndexed that capture either buffer or /// tensor values. -/// 2. `outputsBuffers` may contain StructuredIndexed that capture buffer -/// values. -/// 3. `initTensors` contain tensor values, without indexing maps. -/// 4. `resultTensorTypes` may contain StructuredIndexed that capture return -/// tensor types. +/// 2. `outputs` may contain StructuredIndexed that capture either buffer or +/// tensor values. In the future this will be extended with ranked shape values. +/// 4. `resultTensorTypes` may contain return tensor types. Operation *makeGenericLinalgOp( ArrayRef iteratorTypes, ArrayRef inputs, - ArrayRef outputBuffers, ArrayRef initTensors, - ArrayRef resultTensorTypes, + ArrayRef outputs, TypeRange resultTensorTypes, function_ref regionBuilder = defaultRegionBuilder, ArrayRef otherValues = {}, ArrayRef otherAttributes = {}); diff --git a/mlir/include/mlir/Dialect/Linalg/EDSC/Intrinsics.h b/mlir/include/mlir/Dialect/Linalg/EDSC/Intrinsics.h index d842069f65705..0b53fc7573a5a 100644 --- a/mlir/include/mlir/Dialect/Linalg/EDSC/Intrinsics.h +++ b/mlir/include/mlir/Dialect/Linalg/EDSC/Intrinsics.h @@ -18,6 +18,7 @@ namespace intrinsics { using linalg_copy = OperationBuilder; using linalg_dot = OperationBuilder; using linalg_fill = OperationBuilder; +using linalg_init_tensor = ValueBuilder; using linalg_matmul = OperationBuilder; using linalg_matvec = OperationBuilder; using linalg_vecmat = OperationBuilder; diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.h b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.h index 2438338a534fa..b1ac1a3b48b61 100644 --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.h +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.h @@ -9,7 +9,6 @@ #ifndef MLIR_DIALECT_LINALG_LINALGOPS_H_ #define MLIR_DIALECT_LINALG_LINALGOPS_H_ -#include "mlir/Dialect/Linalg/IR/LinalgTraits.h" #include "mlir/Dialect/Linalg/IR/LinalgTypes.h" #include "mlir/Dialect/StandardOps/IR/Ops.h" #include "mlir/Dialect/Utils/StructuredOpsUtils.h" @@ -111,9 +110,17 @@ SmallVector concat(ArrayRef a, void getDimsOfType(Operation *op, StringRef iteratorTypeName, SmallVectorImpl &res); +namespace detail { +LogicalResult verifyStructuredOpInterface(Operation *op); +} // namespace detail } // namespace linalg } // namespace mlir +namespace mlir { +namespace linalg { +class IndexedGenericOp; +} // namespace linalg +} // namespace mlir #include "mlir/Dialect/Linalg/IR/LinalgStructuredOpsInterfaces.h.inc" #define GET_OP_CLASSES diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td index 66f39104d7e7c..26db4c2f6735e 100644 --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td @@ -19,26 +19,6 @@ include "mlir/Dialect/Linalg/IR/LinalgStructuredOpsInterface.td" include "mlir/Interfaces/CopyOpInterface.td" include "mlir/Interfaces/SideEffectInterfaces.td" -// The Linalg `NInputs` trait provides the API for ops that are known -// to have a specified number of inputs, all passed as operands. -// See Linalg/LinalgTraits.h for implementation details and usage. -class NInputs : - NativeOpTrait<"linalg::NInputs<" # !cast(n) # ">::Impl"> {} - -// The Linalg `ZeroInitTensors` trait provides the API for ops that are known -// to not have input tensor operands. -// See Linalg/LinalgTraits.h for implementation details and usage. -def ZeroInitTensors : NativeOpTrait<"linalg::ZeroInitTensors"> {} - -// The Linalg `NOutputs` trait provides the API for ops that are known -// to have a specified number of outputs, all passed as operands. -// See Linalg/LinalgTraits.h for implementation details and usage. -class NOutputs : - NativeOpTrait<"linalg::NOutputs<" # !cast(n) # ">::Impl"> {} - -def StructuredOpTraits : NativeOpTrait<"linalg::StructuredOpTraits">; -def NamedStructuredOpTrait : NativeOpTrait<"linalg::NamedStructuredOpTrait">; - // Base Tablegen class for Linalg ops. // Linalg ops that correspond to library calls operate on ShapedType as their // first operands. These may be optionally followed by non-view operands @@ -50,7 +30,6 @@ class LinalgStructuredBase_Op props> class LinalgStructured_Op props> : LinalgStructuredBase_Op])> { code libraryCallName = [{ std::string getLibraryCallName() { @@ -65,12 +44,7 @@ class LinalgStructured_Op props> //===----------------------------------------------------------------------===// // At the moment these are not declarative and require a bunch of C++ code. // In the future, these should be migrated to a declarative specification. -def CopyOp : LinalgStructured_Op<"copy", [ - CopyOpInterface, - NInputs<1>, - ZeroInitTensors, - NOutputs<1> - ]> { +def CopyOp : LinalgStructured_Op<"copy", [CopyOpInterface]> { let description = [{ Copies the data in the input view into the output view. @@ -137,6 +111,9 @@ def CopyOp : LinalgStructured_Op<"copy", [ }]>]; let extraClassDeclaration = libraryCallName # [{ + ValueRange inputs() { return getOperands().take_front(); } + ValueRange outputs() { return getOperands().take_back(); } + // Rank-polymorphic. // filling_value -> O(ivs) with parallel iterators. ArrayAttr iterator_types() { @@ -170,14 +147,13 @@ def CopyOp : LinalgStructured_Op<"copy", [ let hasCanonicalizer = 1; } -def FillOp : LinalgStructured_Op<"fill", [ - NInputs<0>, - ZeroInitTensors, - NOutputs<1>]> { - +def FillOp : LinalgStructured_Op<"fill", []> { let arguments = (ins AnyStridedMemRef:$output, AnyTypeOf<[AnyFloat, AnySignlessInteger, AnyVector]>:$value); let extraClassDeclaration = libraryCallName # [{ + ValueRange inputs() { return {}; } + ValueRange outputs() { return getOperands().take_front(); } + // Rank-polymorphic. // filling_value -> O(ivs) with parallel iterators. ArrayAttr iterator_types() { @@ -276,13 +252,8 @@ class PoolingBase_Op props> }]; } -def ConvOp : PoolingBase_Op<"conv", [ - NInputs<2>, - // Despite having reductions, this manually defined ConvOp may only take - // memref operands and can never have init tensors. - ZeroInitTensors, - NOutputs<1>]> { - +// Only support buffer semantics. +def ConvOp : PoolingBase_Op<"conv", []> { let description = [{ Generic n-D convolution as described in the TF documentation: https://www.tensorflow.org/versions/r2.0/api_docs/python/tf/nn/convolution @@ -313,6 +284,9 @@ def ConvOp : PoolingBase_Op<"conv", [ OptionalAttr:$padding); let extraClassDeclaration = commonUtils # [{ + ValueRange inputs() { return getOperands().slice(0, 2); } + ValueRange outputs() { return getOperands().take_back(); } + // TODO: extend to support more than 1 dimensions and potentially grouping // too. unsigned getNumBatchDimensions() { return 1; } @@ -335,6 +309,12 @@ def ConvOp : PoolingBase_Op<"conv", [ // parallelized across; i.e. [zs] in the TF notation above whose number // match `xs` (i.e. 1 window loop per "image" dimension). // This may evolve in the future. + // Conditionally check nPar is large enough for cases of ill-formed op: + // this avoids overflows before hitting the verifier. + assert(nPar > getNumBatchDimensions() + getNumInputFeatureDimensions() && + "expected at least one window dimension (i.e. memref ranks greater " + "than 2). See 'func @conv_rank_limit' in " + "mlir/test/Dialect/Linalg/invalid.mlir"); unsigned nWin = nPar - getNumBatchDimensions() - getNumInputFeatureDimensions(); SmallVector iters(nPar, getParallelIteratorTypeName()); @@ -352,7 +332,8 @@ def ConvOp : PoolingBase_Op<"conv", [ ArrayAttr indexing_maps() { MLIRContext *context = getContext(); auto nWin = getNumWindowLoops(); - assert(nWin > 0 && "expected at least one window dimension"); + assert(nWin > 0 && "expected at least one window dimension (i.e. memref " + "ranks greater than 2)"); unsigned idx = 0; // In the following, AffineDimExprs are indexed in loop order: // [ b, xs, k, q, zs] @@ -394,13 +375,9 @@ def ConvOp : PoolingBase_Op<"conv", [ let hasCanonicalizer = 1; } +// Only support buffer semantics. class SingleInputPoolingBase_Op - : PoolingBase_Op, - // Despite having reductions, this manually defined ConvOp may only take - // memref operands and can never have init tensors. - ZeroInitTensors, - NOutputs<1>]> { + : PoolingBase_Op { let description = [{ A base class for single input pooling function. @@ -420,6 +397,9 @@ class SingleInputPoolingBase_Op OptionalAttr:$padding); let extraClassDeclaration = commonUtils# [{ + ValueRange inputs() { return getOperands().slice(0, 2); } + ValueRange outputs() { return getOperands().take_back(); } + ArrayAttr iterator_types() { // Outer parallel loops are always the number of output dimensions. unsigned nPar = getOutputShapedType(0).getRank(); @@ -493,11 +473,9 @@ class LinalgOperandOfRank: Type< class GenericOpBase : LinalgStructuredBase_Op, - NamedStructuredOpTrait, SingleBlockImplicitTerminator<"YieldOp">]> { let arguments = (ins Variadic:$inputs, - Variadic:$output_buffers, - Variadic:$init_tensors, + Variadic:$outputs, AffineMapArrayAttr:$indexing_maps, ArrayAttr:$iterator_types, OptionalAttr:$doc, @@ -622,34 +600,26 @@ def GenericOp : GenericOpBase<"generic"> { ```mlir %C = linalg.generic #trait_attribute ins(%A, %B : tensor, memref) - init(%C : tensor) + outs(%C : tensor) {other-optional-attributes} {region} -> (tensor) ``` - - The `init` operand and the conventions around mixing tensors and buffers are - described in more detail in the "Tensors and Buffers: Conventions and - Limitations" section in the [Linalg Document](../docs/Linalg.md) - - Tensor values must be legalized by a buffer allocation pass before most - transformations can be applied. Such legalizations move tensor return values - into output buffer operands and updates the region arguments accordingly. }]; let builders = [ OpBuilderDAG<(ins "TypeRange":$resultTensorTypes, "ValueRange":$inputs, - "ValueRange":$outputBuffers, "ValueRange":$initTensors, - "ArrayRef":$indexingMaps, "ArrayRef":$iteratorTypes, - "StringRef":$doc, "StringRef":$libraryCall, + "ValueRange":$outputs, "ArrayRef":$indexingMaps, + "ArrayRef":$iteratorTypes, "StringRef":$doc, + "StringRef":$libraryCall, CArg<"function_ref", "nullptr">)>, OpBuilderDAG<(ins "ValueRange":$inputs, "ValueRange":$outputBuffers, "ArrayRef":$indexingMaps, "ArrayRef":$iteratorTypes, "StringRef":$doc, "StringRef":$libraryCall, CArg<"function_ref", "nullptr">)>, OpBuilderDAG<(ins "TypeRange":$resultTensorTypes, "ValueRange":$inputs, - "ValueRange":$outputBuffers, "ValueRange":$initTensors, - "ArrayRef":$indexingMaps, "ArrayRef":$iteratorTypes, + "ValueRange":$outputs, "ArrayRef":$indexingMaps, + "ArrayRef":$iteratorTypes, CArg<"function_ref", "nullptr">)>, OpBuilderDAG<(ins "ValueRange":$inputs, "ValueRange":$outputBuffers, "ArrayRef":$indexingMaps, "ArrayRef":$iteratorTypes, @@ -714,8 +684,8 @@ def IndexedGenericOp : GenericOpBase<"indexed_generic"> { ```mlir linalg.indexed_generic #matmul_trait - ins(%A, %B : memref, - memref) + ins(%A, %B : memref, + memref) outs(%C : memref) { (%offset_m: index, %offset_n: index, %offset_k: index, %a: f32, %b: f32, %c: f32) : @@ -761,27 +731,19 @@ def IndexedGenericOp : GenericOpBase<"indexed_generic"> { ```mlir %C = linalg.indexed_generic #trait_attribute - ins(%A, %B : tensor, memref) - init(%C : tensor) + ins(%A, %B : tensor, memref) + outs(%C : tensor) {other-optional-attributes} {region_with_index_arguments} -> (tensor) ``` - - The `init` operand and the conventions around mixing tensors and buffers are - described in more detail in the "Tensors and Buffers: Conventions and - Limitations" section in the [Linalg Document](../docs/Linalg.md) - - Tensor values must be legalized by a buffer allocation pass before most - transformations can be applied. Such legalizations move tensor return values - into output buffer operands and update the region arguments accordingly. }]; let builders = [ OpBuilderDAG<(ins "TypeRange":$resultTensorTypes, "ValueRange":$inputs, - "ValueRange":$outputBuffers, "ValueRange":$initTensors, - "ArrayRef":$indexingMaps, "ArrayRef":$iteratorTypes, - "StringRef":$doc, "StringRef":$libraryCall, + "ValueRange":$outputs, "ArrayRef":$indexingMaps, + "ArrayRef":$iteratorTypes, "StringRef":$doc, + "StringRef":$libraryCall, CArg<"function_ref", "nullptr">)>, OpBuilderDAG<(ins "ValueRange":$inputs, "ValueRange":$outputBuffers, @@ -790,8 +752,8 @@ def IndexedGenericOp : GenericOpBase<"indexed_generic"> { CArg<"function_ref", "nullptr">)>, OpBuilderDAG<(ins "TypeRange":$resultTensorTypes, "ValueRange":$inputs, - "ValueRange":$outputBuffers, "ValueRange":$initTensors, - "ArrayRef":$indexingMaps, "ArrayRef":$iteratorTypes, + "ValueRange":$outputs, "ArrayRef":$indexingMaps, + "ArrayRef":$iteratorTypes, CArg<"function_ref", "nullptr">)>, OpBuilderDAG<(ins "ValueRange":$inputs, "ValueRange":$outputBuffers, diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOpsInterface.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOpsInterface.td index 74ca666d63a5e..3fc3fa4a5556e 100644 --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOpsInterface.td +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOpsInterface.td @@ -20,6 +20,24 @@ include "mlir/Dialect/Linalg/IR/LinalgBase.td" def LinalgStructuredInterface : OpInterface<"LinalgOp"> { let cppNamespace = "::mlir::linalg"; let methods = [ + //===------------------------------------------------------------------===// + // Loop types handling. + //===------------------------------------------------------------------===// + InterfaceMethod< + /*desc=*/[{ + Return the number of induction variables in the basic block. This should + always be 0 for index-free linalg ops. For IndexedGeneric, this must be + equal to numLoops + }], + /*retTy=*/"unsigned", + /*methodName=*/"getNumPayloadInductionVariables", + /*args=*/(ins), + /*methodBody=*/"", + /*defaultImplementation=*/[{ + return isa(this->getOperation()) ? + $_op.getNumLoops() : 0; + }] + >, //===------------------------------------------------------------------===// // Loop types handling. //===------------------------------------------------------------------===// @@ -125,42 +143,60 @@ def LinalgStructuredInterface : OpInterface<"LinalgOp"> { getNumIterators(getReductionIteratorTypeName(), iters) == 1; }]>, //===------------------------------------------------------------------===// - // Num input/output/initTensors arguments handling. + // Num input/output arguments handling. //===------------------------------------------------------------------===// - // These special methods must be defined by each op that wants to implement - // the LinalgStructuredInterface. For now, this is either: - // - Explicitly specified in the op definition. - // - Derived from variadic attributes (for "named" ops, linalg.generic and - // linalg.indexed_generic ops). + // `inputs` must be defined by each op that wants to implement the + // LinalgStructuredInterface. + InterfaceMethod< + /*desc=*/[{ + Return the input shape operands. + }], + /*retTy=*/"ValueRange", + /*methodName=*/"inputs", + /*args=*/(ins) + >, + // These special methods rely on `inputs` and `outputs` being defined by + // each op that wants to implement the LinalgStructuredInterface. InterfaceMethod< /*desc=*/[{ Return the number of inputs. }], /*retTy=*/"unsigned", - /*methodName=*/"getNumInputs" + /*methodName=*/"getNumInputs", + /*args=*/(ins), + /*methodBody=*/"", + /*defaultImplementation=*/[{ + return $_op.inputs().size(); + }] >, + // `outputs` must be defined by each op that wants to implement the + // LinalgStructuredInterface. InterfaceMethod< /*desc=*/[{ - Return the number of init tensors. + Return the output shape operands. }], - /*retTy=*/"unsigned", - /*methodName=*/"getNumInitTensors" + /*retTy=*/"ValueRange", + /*methodName=*/"outputs", + /*args=*/(ins) >, InterfaceMethod< /*desc=*/[{ Return the number of outputs. }], /*retTy=*/"unsigned", - /*methodName=*/"getNumOutputs" + /*methodName=*/"getNumOutputs", + /*args=*/(ins), + /*methodBody=*/"", + /*defaultImplementation=*/[{ + return $_op.outputs().size(); + }] >, //===------------------------------------------------------------------===// - // Input arguments handling. + // Input operands handling. //===------------------------------------------------------------------===// InterfaceMethod< /*desc=*/[{ - Return the `i`-th input value. - The `i^th` input argument is always the `i^th` operand regardless of - whether we have tensors or buffers. + Return the `i`-th input operand. }], /*retTy=*/"Value", /*methodName=*/"getInput", @@ -173,24 +209,7 @@ def LinalgStructuredInterface : OpInterface<"LinalgOp"> { >, InterfaceMethod< /*desc=*/[{ - Return the index of the given input value `v`, or `None` if the value is - not an input. - }], - /*retTy=*/"llvm::Optional", - /*methodName=*/"getIndexOfInput", - /*args=*/(ins "Value":$value), - /*methodBody=*/"", - /*defaultImplementation=*/[{ - auto it = llvm::find(getInputs(), value); - if (it != getInputs().end()) - return it - getInputs().begin(); - return llvm::None; - }] - >, - InterfaceMethod< - /*desc=*/[{ - Return the `i`-th input shaped type, irrespective of buffer or tensor - type. + Return the `i`-th input shaped type }], /*retTy=*/"ShapedType", /*methodName=*/"getInputShapedType", @@ -202,7 +221,7 @@ def LinalgStructuredInterface : OpInterface<"LinalgOp"> { >, InterfaceMethod< /*desc=*/[{ - Return the input operands. + Return the range of input operands. }], /*retTy=*/"Operation::operand_range", /*methodName=*/"getInputs", @@ -215,7 +234,19 @@ def LinalgStructuredInterface : OpInterface<"LinalgOp"> { >, InterfaceMethod< /*desc=*/[{ - Return the range over the input operands that are of buffer type. + Return the OpOperands for the input operands. + }], + /*retTy=*/" MutableArrayRef", + /*methodName=*/"getInputOpOperands", + /*args=*/(ins), + /*methodBody=*/"", + /*defaultImplementation=*/[{ + return this->getOperation()->getOpOperands().take_front(getNumInputs()); + }] + >, + InterfaceMethod< + /*desc=*/[{ + Return the subset of input operands that are of buffer type. }], /*retTy=*/"SmallVector", /*methodName=*/"getInputBuffers", @@ -223,417 +254,504 @@ def LinalgStructuredInterface : OpInterface<"LinalgOp"> { /*methodBody=*/"", /*defaultImplementation=*/[{ return llvm::to_vector<4>(llvm::make_filter_range( - getInputs(), [](Value in){ return in.getType().isa(); })); + getInputs(), [](Value in){ return in.getType().template isa(); })); }] >, InterfaceMethod< /*desc=*/[{ - Return the subset of input operands that are of ranked tensor type. + Return the number of input buffer operands. }], - /*retTy=*/"SmallVector", - /*methodName=*/"getInputTensorTypes" , + /*retTy=*/"unsigned", + /*methodName=*/"getNumInputBuffers", /*args=*/(ins), /*methodBody=*/"", /*defaultImplementation=*/[{ - SmallVector res; - for (Type type : getInputs().getTypes()) - if (auto t = type.template dyn_cast()) - res.push_back(t); - return res; + return $_op.getInputBuffers().size(); }] >, - //===------------------------------------------------------------------===// - // Output arguments handling. - //===------------------------------------------------------------------===// InterfaceMethod< /*desc=*/[{ - Return the output buffer at the given index, asserts that this is a - buffer operand and not a tensor result. - The `i^th` output argument is an operand (resp. a return value) iff it - is a value of buffer type (resp. a return value of tensor type). + Return the `index`^th input buffer. }], /*retTy=*/"Value", - /*methodName=*/"getOutputBuffer", - /*args=*/(ins "unsigned":$i), + /*methodName=*/"getInputBuffer", + /*args=*/(ins "unsigned":$index), /*methodBody=*/"", /*defaultImplementation=*/[{ - // Output buffers are passed as output buffer operands (side-effecting). - // Output tensors are results. - // The union of the 2 are all the outputs and we want to ensure i does - // not overflow the buffer operands. - assert(i + this->getOperation()->getNumResults() < $_op.getNumOutputs() - && "overflowing output buffer index"); - return this->getOperation()->getOperand($_op.getNumInputs() + i); + assert(index < getNumInputBuffers()); + return getInputBuffers()[index]; }] >, InterfaceMethod< /*desc=*/[{ - Return the index of the given buffer value, or `None` if the value is - not part of the output buffers. + Return the subset of input operands that are of buffer type. }], - /*retTy=*/"llvm::Optional", - /*methodName=*/"getIndexOfOutputBuffer", - /*args=*/(ins "Value":$value), + /*retTy=*/"SmallVector", + /*methodName=*/"getInputBuffersOpOperands", + /*args=*/(ins), /*methodBody=*/"", /*defaultImplementation=*/[{ - auto it = llvm::find(getOutputBuffers(), value); - if (it != getOutputBuffers().end()) - return it - getOutputBuffers().begin(); - return llvm::None; + SmallVector res; + res.reserve(getNumInputs()); + for (OpOperand &o : getInputOpOperands()) + if (o.get().getType().isa()) + res.push_back(&o); + return res; }] >, InterfaceMethod< /*desc=*/[{ - Return the type of the output buffer at the given index. + Return the subset of input operands that are of tensor type. }], - /*retTy=*/"MemRefType", - /*methodName=*/"getOutputBufferType", - /*args=*/(ins "unsigned":$i), + /*retTy=*/"SmallVector", + /*methodName=*/"getInputTensors", + /*args=*/(ins), /*methodBody=*/"", /*defaultImplementation=*/[{ - return getOutputBuffer(i).getType().template cast(); - }]>, + return llvm::to_vector<4>(llvm::make_filter_range( + getInputs(), + [](Value in){ return in.getType().template isa(); })); + }] + >, InterfaceMethod< /*desc=*/[{ - Return the `i`-th output shaped type, irrespective of buffer or tensor - type. + Return the subset of op operands that are of tensor type. }], - /*retTy=*/"ShapedType", - /*methodName=*/"getOutputShapedType", - /*args=*/(ins "unsigned":$i), + /*retTy=*/"SmallVector", + /*methodName=*/"getInputTensorsOpOperands", + /*args=*/(ins), /*methodBody=*/"", /*defaultImplementation=*/[{ - return getShapedType(i + $_op.getNumInputs()); - }]>, + SmallVector res; + res.reserve(getNumInputs()); + for (OpOperand &o : getInputOpOperands()) + if (o.get().getType().isa()) + res.push_back(&o); + return res; + }] + >, InterfaceMethod< /*desc=*/[{ - Return the results that are of ranked tensor type. + Return the types of the subset of input operands that are of buffer type. }], - /*retTy=*/"SmallVector", - /*methodName=*/"getOutputTensorTypes", + /*retTy=*/"SmallVector", + /*methodName=*/"getInputBufferTypes" , /*args=*/(ins), /*methodBody=*/"", /*defaultImplementation=*/[{ - SmallVector res; - for (Type type : this->getOperation()->getResults().getTypes()) - res.push_back(type.template cast()); - return res; - }]>, + return llvm::to_vector<4>( + llvm::map_range( + llvm::make_filter_range( + ValueRange(getInputs()).getTypes(), + [](Type in){ return in.isa(); }), + [](Type in){ return in.cast(); })); + }] + >, InterfaceMethod< /*desc=*/[{ - Return the output buffers (operands). + Return the types of the subset of input operands that are of ranked + tensor type. }], - /*retTy=*/"Operation::operand_range", - /*methodName=*/"getOutputBuffers", + /*retTy=*/"SmallVector", + /*methodName=*/"getInputTensorTypes" , /*args=*/(ins), /*methodBody=*/"", /*defaultImplementation=*/[{ - auto range = this->getOperation()->getOperands(); - return {range.begin() + $_op.getNumInputs(), - range.begin() + getNumInputsAndOutputBuffers()}; + return llvm::to_vector<4>( + llvm::map_range( + llvm::make_filter_range( + ValueRange(getInputs()).getTypes(), + [](Type in){ return in.isa(); }), + [](Type in){ return in.cast(); })); }] >, //===------------------------------------------------------------------===// - // Input and Output arguments handling. + // Output operands handling. //===------------------------------------------------------------------===// InterfaceMethod< /*desc=*/[{ - Return one single buffer at position `$i`. + Return the `i`-th output operand. }], /*retTy=*/"Value", - /*methodName=*/"getBuffer", + /*methodName=*/"getOutput", /*args=*/(ins "unsigned":$i), /*methodBody=*/"", /*defaultImplementation=*/[{ - assert(i < getNumInputsAndOutputBuffers() && "overflowing buffers index"); - return this->getOperation()->getOperand(i); + assert(i < $_op.getNumOutputs()); + return this->getOperation()->getOperand(i + $_op.getNumInputs()); }] >, InterfaceMethod< /*desc=*/[{ - Return the number of output buffers + Return the `i`-th output shaped type }], - /*retTy=*/"unsigned", - /*methodName=*/"getNumOutputBuffers", + /*retTy=*/"ShapedType", + /*methodName=*/"getOutputShapedType", + /*args=*/(ins "unsigned":$i), + /*methodBody=*/"", + /*defaultImplementation=*/[{ + return getOutput(i).getType().template cast(); + }] + >, + InterfaceMethod< + /*desc=*/[{ + Return the range of output operands. + }], + /*retTy=*/"Operation::operand_range", + /*methodName=*/"getOutputs", /*args=*/(ins), /*methodBody=*/"", /*defaultImplementation=*/[{ - return $_op.getNumOutputs() - this->getOperation()->getNumResults(); + auto start = + this->getOperation()->getOperands().begin() + $_op.getNumInputs(); + return {start, start + $_op.getNumOutputs()}; }] >, InterfaceMethod< /*desc=*/[{ - Return the number of inputs and outputs, irrespective of their buffer or - tensor type. + Return the OpOperands for the output operands. }], - /*retTy=*/"unsigned", - /*methodName=*/"getNumInputsAndOutputs", + /*retTy=*/" MutableArrayRef", + /*methodName=*/"getOutputOpOperands", /*args=*/(ins), /*methodBody=*/"", /*defaultImplementation=*/[{ - return $_op.getNumInputs() + $_op.getNumOutputs(); + return this->getOperation()->getOpOperands().slice( + getNumInputs(), getNumOutputs()); }] >, InterfaceMethod< /*desc=*/[{ - Return the number of inputs, irrespective of their buffer or tensor type - and output buffers + Return the subset of output operands that are of buffer type. }], - /*retTy=*/"unsigned", - /*methodName=*/"getNumInputsAndOutputBuffers", + /*retTy=*/"SmallVector", + /*methodName=*/"getOutputBuffers", /*args=*/(ins), /*methodBody=*/"", /*defaultImplementation=*/[{ - return $_op.getNumInputs() + $_op.getNumOutputs() - - this->getOperation()->getNumResults(); + return llvm::to_vector<4>(llvm::make_filter_range( + getOutputs(), [](Value in){ return in.getType().template isa(); })); }] >, InterfaceMethod< /*desc=*/[{ - Return the range over inputs (irrespective of type) and output buffers. + Return the `index`^th output buffer. }], - /*retTy=*/"Operation::operand_range", - /*methodName=*/"getInputsAndOutputBuffers", + /*retTy=*/"Value", + /*methodName=*/"getOutputBuffer", + /*args=*/(ins "unsigned":$index), + /*methodBody=*/"", + /*defaultImplementation=*/[{ + assert(index < getNumOutputBuffers()); + return getOutputBuffers()[index]; + }] + >, + InterfaceMethod< + /*desc=*/[{ + Return the subset of output operands that are of buffer type. + }], + /*retTy=*/"SmallVector", + /*methodName=*/"getOutputBuffersOpOperands", /*args=*/(ins), /*methodBody=*/"", /*defaultImplementation=*/[{ - auto range = this->getOperation()->getOperands(); - return {range.begin(), range.begin() + getNumInputsAndOutputBuffers()}; + SmallVector res; + res.reserve(getNumOutputs()); + for (OpOperand &o : getOutputOpOperands()) + if (o.get().getType().isa()) + res.push_back(&o); + return res; }] >, InterfaceMethod< /*desc=*/[{ - Return the range over init tensors. + Return the number of output buffer operands. }], - /*retTy=*/"Operation::operand_range", - /*methodName=*/"getInitTensors", + /*retTy=*/"unsigned", + /*methodName=*/"getNumOutputBuffers", /*args=*/(ins), /*methodBody=*/"", /*defaultImplementation=*/[{ - auto range = this->getOperation()->getOperands(); - auto base = range.begin() + getNumInputsAndOutputBuffers(); - return {base, base + $_op.getNumInitTensors()}; + return $_op.getOutputBuffers().size(); }] >, InterfaceMethod< /*desc=*/[{ - Return one single init tensor at position `$i`. + Return the subset of output operands that are of tensor type. }], - /*retTy=*/"Value", - /*methodName=*/"getInitTensor", - /*args=*/(ins "unsigned":$i), + /*retTy=*/"SmallVector", + /*methodName=*/"getOutputTensors", + /*args=*/(ins), /*methodBody=*/"", /*defaultImplementation=*/[{ - assert(i < $_op.getNumInitTensors() && "overflowing init tensor index"); - return getInitTensors()[i]; + return llvm::to_vector<4>(llvm::make_filter_range( + getOutputs(), + [](Value in){ return in.getType().template isa(); })); }] >, InterfaceMethod< /*desc=*/[{ - Return true if the shaped operand index `i` is the index of an init - tensor. + Return the subset of output operands that are of tensor type. }], - /*retTy=*/"bool", - /*methodName=*/"isIndexOfAnInitTensor", - /*args=*/(ins "unsigned":$i), + /*retTy=*/"SmallVector", + /*methodName=*/"getOutputTensorsOpOperands", + /*args=*/(ins), /*methodBody=*/"", /*defaultImplementation=*/[{ - assert(i < $_op.getNumShapedOperands() && "overflowing shaped operand index"); - return i >= $_op.getNumInputs() + getNumOutputBuffers(); + SmallVector res; + res.reserve(getNumOutputs()); + for (OpOperand &o : getOutputOpOperands()) + if (o.get().getType().isa()) + res.push_back(&o); + return res; }] >, InterfaceMethod< /*desc=*/[{ - Return the relative init tensor index of the shaped operand index. + Return the number of output tensor operands. }], /*retTy=*/"unsigned", - /*methodName=*/"getInitTensorIndexFromShapedIndex", - /*args=*/(ins "unsigned":$i), + /*methodName=*/"getNumOutputTensors", + /*args=*/(ins), /*methodBody=*/"", /*defaultImplementation=*/[{ - assert(isIndexOfAnInitTensor(i) && "expected an init tensor index"); - return i - $_op.getNumInputs() - getNumOutputBuffers(); + return $_op.getOutputTensors().size(); }] >, InterfaceMethod< /*desc=*/[{ - Return the index of the given init tensor value, or `None` if the value - is not part of the init tensors. + Return the types of the subset of output operands that are of buffer type. }], - /*retTy=*/"llvm::Optional", - /*methodName=*/"getIndexOfInitTensor", - /*args=*/(ins "Value":$value), + /*retTy=*/"SmallVector", + /*methodName=*/"getOutputBufferTypes" , + /*args=*/(ins), /*methodBody=*/"", /*defaultImplementation=*/[{ - auto it = llvm::find(getInitTensors(), value); - if (it != getInitTensors().end()) - return it - getInitTensors().begin(); - return llvm::None; + return llvm::to_vector<4>( + llvm::map_range( + llvm::make_filter_range( + ValueRange(getOutputs()).getTypes(), + [](Type in){ return in.isa(); }), + [](Type in){ return in.cast(); })); }] >, InterfaceMethod< /*desc=*/[{ - Return the number of inputs, output buffers and init tensors operands. + Return the types of the subset of output operands that are of ranked + tensor type. }], - /*retTy=*/"unsigned", - /*methodName=*/"getNumShapedOperands", + /*retTy=*/"SmallVector", + /*methodName=*/"getOutputTensorTypes" , /*args=*/(ins), /*methodBody=*/"", /*defaultImplementation=*/[{ - return getNumInputsAndOutputBuffers() + $_op.getNumInitTensors(); + return llvm::to_vector<4>( + llvm::map_range( + llvm::make_filter_range( + ValueRange(getOutputs()).getTypes(), + [](Type in){ return in.isa(); }), + [](Type in){ return in.cast(); })); }] >, + + //===------------------------------------------------------------------===// + // Input and Output arguments handling. + //===------------------------------------------------------------------===// InterfaceMethod< /*desc=*/[{ - Return the `i`-th shaped operand value, which can be an arbitrary input - tensor/buffer, init tensor or output buffer. + Return true if the payload uses the value loaded from `opOperand`. This + is useful to avoid loading from "write-only" memory that may be + uninitialized, as well as properly cloning "read-write" operands. }], - /*retTy=*/"Value", - /*methodName=*/"getShapedOperand", - /*args=*/(ins "unsigned":$i), + /*retTy=*/"bool", + /*methodName=*/"payloadUsesValueFromOpOperand", + /*args=*/(ins "OpOperand *":$opOperand), /*methodBody=*/"", /*defaultImplementation=*/[{ - assert(i < $_op.getNumShapedOperands()); - return this->getOperation()->getOperand(i); + unsigned bbArgNumber = + getNumPayloadInductionVariables() + opOperand->getOperandNumber(); + // Safeguard against the named linalg ops that are manually defined and + // that only support buffer semantics: we should not be there. + // Such ops have an empty regionBuilder and are not constructed with a + // region for now. In the future they are slated to disappear. + assert(this->getOperation()->getNumRegions() == 1 && "unexpected " + "missing region (calling `payloadUsesValueFromOpOperand` on " + "manually defined named Linalg op?)"); + Block &block = this->getOperation()->getRegion(0).front(); + // Init tensors have uses. + return !block.getArgument(bbArgNumber).use_empty(); }] >, InterfaceMethod< /*desc=*/[{ - Return the range over inputs, output buffers and init tensors. + Return true if the payload uses the value loaded from input operand + `index`. }], - /*retTy=*/"Operation::operand_range", - /*methodName=*/"getShapedOperands", - /*args=*/(ins), + /*retTy=*/"bool", + /*methodName=*/"payloadUsesValueFromInputOperandIndex", + /*args=*/(ins "unsigned":$index), /*methodBody=*/"", /*defaultImplementation=*/[{ - auto range = this->getOperation()->getOperands(); - return {range.begin(), range.begin() + getNumShapedOperands()}; + return payloadUsesValueFromOpOperand(&getInputOpOperands()[index]); }] >, InterfaceMethod< /*desc=*/[{ - Return the `i`-th shaped type, there are 3 cases: - 1. if `i < $_op.getNumInputs()` then return `getInputShapedType(i)`; - otherwise - 2. if `i < getNumInputsAndOutputBuffers()` then return the - `getOutputBufferType(i - $_op.getNumInputs())`; otherwise - 3. return the `i - getNumInputsAndOutputBuffers()` result type. + Return true if the payload uses the value loaded from output operand + `index`. }], - /*retTy=*/"ShapedType", - /*methodName=*/"getShapedType", - /*args=*/(ins "unsigned":$i), + /*retTy=*/"bool", + /*methodName=*/"payloadUsesValueFromOutputOperandIndex", + /*args=*/(ins "unsigned":$index), /*methodBody=*/"", /*defaultImplementation=*/[{ - if (i < $_op.getNumInputs()) - return getInputShapedType(i); - if (i < getNumInputsAndOutputBuffers()) - return getOutputBufferType(i - $_op.getNumInputs()); - return this->getOperation()->getResult( - i - getNumInputsAndOutputBuffers()). - getType().template cast(); - }]>, + return payloadUsesValueFromOpOperand(&getOutputOpOperands()[index]); + }] + >, InterfaceMethod< /*desc=*/[{ - Return the shaped types for all the inputs and outputs + Return true if `opOperand` is an init tensor. This is true when it is + an output tensor operand whose value is used in the payload region. }], - /*retTy=*/"SmallVector", - /*methodName=*/"getInputOutputShapedTypes", + /*retTy=*/"bool", + /*methodName=*/"isInitTensor", + /*args=*/(ins "OpOperand *":$opOperand), + /*methodBody=*/"", + /*defaultImplementation=*/[{ + if (!opOperand->get().getType().template isa()) + return false; + if (opOperand->getOperandNumber() < $_op.getNumInputs()) + return false; + return payloadUsesValueFromOpOperand(opOperand); + }] + >, + InterfaceMethod< + /*desc=*/[{ + Return true if the operand at output index `index` is an init tensor. + }], + /*retTy=*/"bool", + /*methodName=*/"isIndexOfInitTensor", + /*args=*/(ins "unsigned":$index), + /*methodBody=*/"", + /*defaultImplementation=*/[{ + assert(index < getNumOutputs()); + return isInitTensor( + &this->getOperation()->getOpOperands()[$_op.getNumInputs() + index]); + }] + >, + InterfaceMethod< + /*desc=*/[{ + Return the output operands that are init tensors. + }], + /*retTy=*/"SmallVector", + /*methodName=*/"getInitTensors", /*args=*/(ins), /*methodBody=*/"", /*defaultImplementation=*/[{ - SmallVector inputOutputTypes( - this->getOperation()->operand_type_begin(), - this->getOperation()->operand_type_end()); - inputOutputTypes.append(this->getOperation()->result_type_begin(), - this->getOperation()->result_type_end()); + auto start = + this->getOperation()->getOpOperands().begin() + $_op.getNumInputs(); return llvm::to_vector<4>( - llvm::map_range(inputOutputTypes, [](Type type) -> ShapedType { - return type.cast(); - })); + llvm::map_range( + llvm::make_filter_range( + llvm::make_range(start, start + $_op.getNumOutputs()), + [&](OpOperand &opOperand) { + return $_op.isInitTensor(&opOperand); + }), + [&](OpOperand &opOperand) { + return opOperand.get(); + })); }] >, InterfaceMethod< /*desc=*/[{ - Return the first position of the shaped operand in the operand list. + Return the number of init tensor operands. }], - /*retTy=*/"Optional", - /*methodName=*/"getIndexOfShapedOperand", - /*args=*/(ins "Value":$value), + /*retTy=*/"unsigned", + /*methodName=*/"getNumInitTensors", + /*args=*/(ins), + /*methodBody=*/"", + /*defaultImplementation=*/[{ + return getInitTensors().size(); + }] + >, + InterfaceMethod< + /*desc=*/[{ + Return the number of input and output operands. + }], + /*retTy=*/"unsigned", + /*methodName=*/"getNumShapedOperands", + /*args=*/(ins), /*methodBody=*/"", /*defaultImplementation=*/[{ - Optional inputIndex = getIndexOfInput(value); - if (inputIndex.hasValue()) return inputIndex.getValue(); - Optional outputIndex = getIndexOfOutputBuffer(value); - if (outputIndex.hasValue()) - return $_op.getNumInputs() + outputIndex.getValue(); - Optional initTensorIndex = getIndexOfInitTensor(value); - if (initTensorIndex.hasValue()) - return $_op.getNumInputs() + $_op.getNumOutputBuffers() + initTensorIndex.getValue(); - return llvm::None; + return $_op.getNumInputs() + $_op.getNumOutputs(); }] >, InterfaceMethod< /*desc=*/[{ - Returns the operand index given the input index. Returns None - of the input index is invalid. + Return the `i`-th shaped operand value. }], - /*retTy=*/"Optional", - /*methodName=*/"getOperandIndexForInputIndex", - /*args=*/(ins "unsigned":$input_index), + /*retTy=*/"Value", + /*methodName=*/"getShapedOperand", + /*args=*/(ins "unsigned":$i), /*methodBody=*/"", /*defaultImplementation=*/[{ - if (input_index >= $_op.getNumInputs()) - return llvm::None; - return input_index; + assert(i < $_op.getNumShapedOperands()); + return this->getOperation()->getOperand(i); }] >, InterfaceMethod< /*desc=*/[{ - Returns the operand index given the output index. Returns None - of the output index is invalid. + Return the range over input and output operands. }], - /*retTy=*/"Optional", - /*methodName=*/"getOperandIndexForOutputIndex", - /*args=*/(ins "unsigned":$output_index), + /*retTy=*/"Operation::operand_range", + /*methodName=*/"getShapedOperands", + /*args=*/(ins), /*methodBody=*/"", /*defaultImplementation=*/[{ - if (output_index >= $_op.getNumOutputs()) - return llvm::None; - return output_index + $_op.getNumInputs(); + auto range = this->getOperation()->getOperands(); + return {range.begin(), range.begin() + getNumShapedOperands()}; }] >, InterfaceMethod< /*desc=*/[{ - Returns the input index given the operand index. Return None - if the operand index doesnt corresponding to an input. + Return the OpOperands for all the shaped operands. }], - /*retTy=*/"Optional", - /*methodName=*/"getInputIndex", - /*args=*/(ins "unsigned":$operand_index), + /*retTy=*/" MutableArrayRef", + /*methodName=*/"getShapedOpOperands", + /*args=*/(ins), /*methodBody=*/"", /*defaultImplementation=*/[{ - if (operand_index >= $_op.getNumInputs()) - return llvm::None; - return operand_index; + return this->getOperation()->getOpOperands().take_front( + getNumShapedOperands()); }] >, InterfaceMethod< /*desc=*/[{ - Returns the output index given the operand index. Return None - if the operand index doesnt corresponding to an output. + Return the range over input and output operands. }], - /*retTy=*/"Optional", - /*methodName=*/"getOutputIndex", - /*args=*/(ins "unsigned":$operand_index), + /*retTy=*/"SmallVector", + /*methodName=*/"getShapedOperandTypes", + /*args=*/(ins), /*methodBody=*/"", /*defaultImplementation=*/[{ - if (operand_index < $_op.getNumInputs() || - operand_index >= $_op.getNumInputs() + $_op.getNumOutputs()) - return llvm::None; - return operand_index - $_op.getNumInputs(); + return llvm::to_vector<4>( + llvm::map_range( + getShapedOperands(), + [](Value v) { return v.getType().cast(); })); }] >, + InterfaceMethod< + /*desc=*/[{ + Return the `i`-th shaped type + }], + /*retTy=*/"ShapedType", + /*methodName=*/"getShapedType", + /*args=*/(ins "unsigned":$i), + /*methodBody=*/"", + /*defaultImplementation=*/[{ + return $_op.getShapedOperand(i).getType().template cast(); + }]>, //===------------------------------------------------------------------===// // Other interface methods. @@ -679,7 +797,7 @@ def LinalgStructuredInterface : OpInterface<"LinalgOp"> { /*args=*/(ins "unsigned":$i), /*methodBody=*/"", /*defaultImplementation=*/[{ - assert(i < getNumInputsAndOutputs()); + assert(i < $_op.getNumShapedOperands()); return getIndexingMaps()[i]; }] >, @@ -719,8 +837,8 @@ def LinalgStructuredInterface : OpInterface<"LinalgOp"> { /*methodBody=*/"", /*defaultImplementation=*/[{ return this->getOperation()->getNumResults() == 0 && - llvm::all_of(getInputs(), - [](Value v) { return v.getType().isa(); }); + llvm::all_of(getShapedOperands(), [](Value v) { + return v.getType().template isa(); }); }] >, InterfaceMethod< @@ -732,11 +850,9 @@ def LinalgStructuredInterface : OpInterface<"LinalgOp"> { /*args=*/(ins), /*methodBody=*/"", /*defaultImplementation=*/[{ - auto isTensorType = [](Value v) { - return v.getType().isa(); - }; - return llvm::all_of(getInputs(), isTensorType) && - llvm::all_of(this->getOperation()->getResults(), isTensorType); + return llvm::all_of(getShapedOperands(), [](Value v) { + return v.getType().template isa(); + }); }] >, InterfaceMethod< @@ -748,7 +864,8 @@ def LinalgStructuredInterface : OpInterface<"LinalgOp"> { /*args=*/(ins), /*methodBody=*/"", /*defaultImplementation=*/[{ - return $_op->getAttr(getSparseAttrName()).template dyn_cast_or_null() != nullptr; + return $_op->getAttr(getSparseAttrName()). + template dyn_cast_or_null() != nullptr; }] >, InterfaceMethod< @@ -871,7 +988,7 @@ def LinalgStructuredInterface : OpInterface<"LinalgOp"> { ]; let extraClassDeclaration = [{ - /// Return the flat list of all operand dimension sizes in the order they + /// Return the flat list of all operand dimension sizes in the order they /// appear in the operands. SmallVector createFlatListOfOperandDims(OpBuilder &, Location); @@ -893,7 +1010,7 @@ def LinalgStructuredInterface : OpInterface<"LinalgOp"> { for (unsigned i = 0; i < nExtraOperands; ++i) { res.push_back(getOperation()->getOperand(numShapedOperands + i)); assert((res.back().getType().isSignlessIntOrIndexOrFloat() - || res.back().getType().isa()) && + || res.back().getType().template isa()) && "expected scalar or vector type"); } return res; @@ -904,7 +1021,6 @@ def LinalgStructuredInterface : OpInterface<"LinalgOp"> { //========================================================================// void setNumInputs(unsigned num) { setOperandSegmentAt(0, num); } void setNumOutputBuffers(unsigned num) { setOperandSegmentAt(1, num); } - void setNumInitTensors(unsigned num) { setOperandSegmentAt(2, num); } private: void setOperandSegmentAt(unsigned idx, unsigned val) { @@ -916,6 +1032,8 @@ def LinalgStructuredInterface : OpInterface<"LinalgOp"> { getOperation()->setAttr("operand_segment_sizes", newAttr); } }]; + + let verify = [{ return detail::verifyStructuredOpInterface($_op); }]; } #endif // LINALG_IR_STRUCTURED_OPS_INTERFACE diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgTraits.h b/mlir/include/mlir/Dialect/Linalg/IR/LinalgTraits.h deleted file mode 100644 index adfa6a6f1af93..0000000000000 --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgTraits.h +++ /dev/null @@ -1,166 +0,0 @@ -//===- LinalgTraits.h - Linalg Traits ---------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef MLIR_DIALECT_LINALG_LINALGTRAITS_H_ -#define MLIR_DIALECT_LINALG_LINALGTRAITS_H_ - -#include "mlir/Dialect/Linalg/IR/LinalgTypes.h" -#include "mlir/Dialect/Utils/StructuredOpsUtils.h" -#include "mlir/IR/AffineMap.h" -#include "mlir/IR/BuiltinOps.h" -#include "mlir/IR/BuiltinTypes.h" -#include "mlir/IR/OpDefinition.h" -#include "mlir/Support/LLVM.h" - -namespace mlir { -namespace OpTrait { -namespace linalg { - -/// This class provides the API for ops that are known to have a specified -/// number of inputs, all passed as operands. Use as a trait as follows: -/// -/// class DotOp : public Op::Impl> { -/// -template class NInputs { -public: - template - class Impl : public OpTrait::TraitBase::Impl> { - public: - static unsigned getNumInputs() { return N; } - }; -}; - -/// This class provides the API for ops that are known to not have init tensor -/// operands. Use as a trait as follows: -/// -/// class CopyOp : public Op { -/// -template -class ZeroInitTensors : public TraitBase { -public: - static unsigned getNumInitTensors() { return 0; } -}; - -/// This class provides the API for ops that are known to have a specified -/// number of outputs, all passed as operands. Use as a trait as follows: -/// -/// class DotOp : public Op::Impl> { -/// -template class NOutputs { -public: - template - class Impl : public OpTrait::TraitBase::Impl> { - public: - static unsigned getNumOutputs() { return N; } - }; -}; - -/// This class provides a verifier for structured ops that are known to operate -/// on buffers or tensors. This trait must be used in conjunction with an op -/// definition or a trait that provides the methods `getNumInputs` and -/// `getNumOutputs`. Use as a trait as follows: -/// -/// class DotOp : public Op { -/// -template -class StructuredOpTraits - : public OpTrait::TraitBase { -public: - static LogicalResult verifyTrait(Operation *op) { - ConcreteType concreteOp = cast(op); - auto nOperands = concreteOp.getNumInputsAndOutputBuffers(); - if (failed(OpTrait::impl::verifyAtLeastNOperands(op, nOperands))) - return failure(); - if (op->getNumResults() > concreteOp.getNumOutputs()) - return op->emitError("unexpected #results > #outputs"); - return success(); - } -}; - -/// This class provides a verifier for structured ops that are known to operate -/// on buffers or tensors and that support `ins`, `outs` and `init` arguments. -/// This trait must be used in conjunction with an op definition or a trait that -/// provides the methods `getNumInputs` and `getNumOutputs`. -/// -/// Use as a trait as follows: -/// -/// class MatmulOp : public Op { -/// -template -class NamedStructuredOpTrait - : public OpTrait::TraitBase { -public: - unsigned getNumInputs() { - return cast(this->getOperation()).inputs().size(); - } - unsigned getNumInitTensors() { - return cast(this->getOperation()).init_tensors().size(); - } - unsigned getNumOutputs() { - ConcreteType concreteOp = cast(this->getOperation()); - return concreteOp.output_buffers().size() + - concreteOp.result_tensors().size(); - } - static LogicalResult verifyTrait(Operation *op) { - ConcreteType concreteOp = cast(op); - unsigned nInputAndBufferOperands = - concreteOp.getNumInputsAndOutputBuffers(); - if (failed( - OpTrait::impl::verifyAtLeastNOperands(op, nInputAndBufferOperands))) - return failure(); - - SmallVector redDims; - concreteOp.getReductionDims(redDims); - // If no result and no reduction, only check there is no init tensor and we - // are done. - if (redDims.empty() || op->getNumResults() == 0) { - if (!concreteOp.init_tensors().empty()) - return op->emitError("expected empty `init` when op has no " - "results or no reduction dims"); - return success(); - } - - // Only a single tensor result supported atm. - if (op->getNumResults() != 1) - return op->emitError( - "expected single tensor result when reduction present"); - - if (concreteOp.init_tensors().size() != op->getNumResults()) - return op->emitError( - "expected #init tensors to match #results when reduction present"); - - for (unsigned idx = 0, e = op->getNumResults(); idx < e; ++idx) - if (concreteOp.init_tensors()[idx].getType() != op->getResultTypes()[idx]) - return op->emitError("expected init tensor #") - << idx << " of the same type as result #" << idx; - - // Output tensor indexing map may not depend on reduction index. - // TODO: this is not yet tested. Add a test when linalg.generic switches to - // this representation. - for (unsigned idx = 0, e = concreteOp.getNumOutputs(); idx < e; ++idx) { - AffineMap outputMap = concreteOp.getOutputIndexingMap(idx); - for (auto expr : outputMap.getResults()) { - for (auto dim : redDims) { - unsigned pos = dim.cast().getPosition(); - if (expr.isFunctionOfDim(pos)) - return op->emitError( - "unexpected single tensor output indexing map ") - << "is function of reduction dim @" << pos; - } - } - } - - return success(); - } -}; - -} // namespace linalg -} // namespace OpTrait -} // namespace mlir - -#endif // MLIR_DIALECT_LINALG_LINALGTRAITS_H_ diff --git a/mlir/include/mlir/IR/OpBase.td b/mlir/include/mlir/IR/OpBase.td index 552ac75bfee5d..0f060b2b1a0a5 100644 --- a/mlir/include/mlir/IR/OpBase.td +++ b/mlir/include/mlir/IR/OpBase.td @@ -673,6 +673,11 @@ class AnyStridedMemRefOfRank : MemRefRankOf<[AnyType], [rank]>.predicate]>, AnyStridedMemRef.description # " of rank " # rank>; +class StridedMemRefRankOf allowedTypes, list ranks> : + Type.predicate, HasAnyRankOfPred]>, + StrJoin.result # " " # + MemRefOf.description>; + // This represents a generic tuple without any constraints on element type. def AnyTuple : Type; diff --git a/mlir/integration_test/Dialect/Linalg/CPU/test-tensor-matmul.mlir b/mlir/integration_test/Dialect/Linalg/CPU/test-tensor-matmul.mlir index 38d97332f0d70..9e4b9f39f7fbf 100644 --- a/mlir/integration_test/Dialect/Linalg/CPU/test-tensor-matmul.mlir +++ b/mlir/integration_test/Dialect/Linalg/CPU/test-tensor-matmul.mlir @@ -22,7 +22,7 @@ func @main() { %C = constant dense<1000.0> : tensor<2x4xf32> %D = linalg.matmul ins(%A, %B: tensor<2x3xf32>, tensor<3x4xf32>) - init(%C: tensor<2x4xf32>) -> tensor<2x4xf32> + outs(%C: tensor<2x4xf32>) -> tensor<2x4xf32> %unranked = tensor.cast %D : tensor<2x4xf32> to tensor<*xf32> call @print_memref_f32(%unranked) : (tensor<*xf32>) -> () diff --git a/mlir/lib/Dialect/Linalg/Analysis/DependenceAnalysis.cpp b/mlir/lib/Dialect/Linalg/Analysis/DependenceAnalysis.cpp index ca2d16e8de863..1042930b1ef7c 100644 --- a/mlir/lib/Dialect/Linalg/Analysis/DependenceAnalysis.cpp +++ b/mlir/lib/Dialect/Linalg/Analysis/DependenceAnalysis.cpp @@ -13,6 +13,7 @@ #include "mlir/Dialect/Linalg/Analysis/DependenceAnalysis.h" #include "mlir/Dialect/Linalg/IR/LinalgOps.h" #include "mlir/Dialect/StandardOps/IR/Ops.h" +#include "mlir/IR/BuiltinOps.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -113,15 +114,16 @@ LinalgDependenceGraph::LinalgDependenceGraph(Aliases &aliases, } void LinalgDependenceGraph::addDependenceElem(DependenceType dt, - LinalgOpView indexingOpView, - LinalgOpView dependentOpView) { + OpOperand *indexingOpView, + OpOperand *dependentOpView) { LLVM_DEBUG(dbgs() << "\nAdd dep type " << getDependenceTypeStr(dt) << ":\t (" - << *indexingOpView.op << ", " << indexingOpView.operandIndex - << ") -> \n\t\t(" << *dependentOpView.op << ", " - << dependentOpView.operandIndex << ")"); - dependencesFromGraphs[dt][indexingOpView.op].push_back( + << indexingOpView->get() << " @" + << indexingOpView->getOperandNumber() << ") -> \n\t\t(" + << dependentOpView->get() << " @" + << dependentOpView->getOperandNumber() << ")"); + dependencesFromGraphs[dt][indexingOpView->getOwner()].push_back( LinalgDependenceGraphElem{dependentOpView, indexingOpView, dt}); - dependencesIntoGraphs[dt][dependentOpView.op].push_back( + dependencesIntoGraphs[dt][dependentOpView->getOwner()].push_back( LinalgDependenceGraphElem{indexingOpView, dependentOpView, dt}); } @@ -156,57 +158,25 @@ LinalgDependenceGraph::getDependencesInto( } void LinalgDependenceGraph::addDependencesBetween(LinalgOp src, LinalgOp dst) { - for (auto srcView : llvm::enumerate(src.getOutputBuffers())) { // W - unsigned srcIndex = - src.getOperandIndexForOutputIndex(srcView.index()).getValue(); + for (OpOperand *srcOpOperand : src.getOutputBuffersOpOperands()) { // W // RAW graph - for (auto dstView : llvm::enumerate(dst.getInputBuffers())) { // R - if (aliases.alias(srcView.value(), - dstView.value())) { // if alias, fill RAW - unsigned dstIndex = - dst.getOperandIndexForInputIndex(dstView.index()).getValue(); - addDependenceElem(DependenceType::RAW, - LinalgOpView{src.getOperation(), srcIndex}, - LinalgOpView{dst.getOperation(), dstIndex}); - } - } + for (OpOperand *dstOpOperand : dst.getInputBuffersOpOperands()) // R + if (aliases.alias(srcOpOperand->get(), dstOpOperand->get())) // RAW alias + addDependenceElem(DependenceType::RAW, srcOpOperand, dstOpOperand); // WAW graph - for (auto dstView : llvm::enumerate(dst.getOutputBuffers())) { // W - if (aliases.alias(srcView.value(), - dstView.value())) { // if alias, fill WAW - unsigned dstIndex = - dst.getOperandIndexForOutputIndex(dstView.index()).getValue(); - addDependenceElem(DependenceType::WAW, - LinalgOpView{src.getOperation(), srcIndex}, - LinalgOpView{dst.getOperation(), dstIndex}); - } - } + for (OpOperand *dstOpOperand : dst.getOutputBuffersOpOperands()) // W + if (aliases.alias(srcOpOperand->get(), dstOpOperand->get())) // WAW alias + addDependenceElem(DependenceType::WAW, srcOpOperand, dstOpOperand); } - for (auto srcView : llvm::enumerate(src.getInputBuffers())) { // R - unsigned srcIndex = - src.getOperandIndexForInputIndex(srcView.index()).getValue(); + for (OpOperand *srcOpOperand : src.getInputBuffersOpOperands()) { // R // RAR graph - for (auto dstView : llvm::enumerate(dst.getInputBuffers())) { // R - if (aliases.alias(srcView.value(), - dstView.value())) { // if alias, fill RAR - unsigned dstIndex = - dst.getOperandIndexForInputIndex(dstView.index()).getValue(); - addDependenceElem(DependenceType::RAR, - LinalgOpView{src.getOperation(), srcIndex}, - LinalgOpView{dst.getOperation(), dstIndex}); - } - } + for (OpOperand *dstOpOperand : dst.getInputBuffersOpOperands()) // R + if (aliases.alias(srcOpOperand->get(), dstOpOperand->get())) // RAR alias + addDependenceElem(DependenceType::RAR, srcOpOperand, dstOpOperand); // WAR graph - for (auto dstView : llvm::enumerate(dst.getOutputBuffers())) { // W - if (aliases.alias(srcView.value(), - dstView.value())) { // if alias, fill WAR - unsigned dstIndex = - dst.getOperandIndexForOutputIndex(dstView.index()).getValue(); - addDependenceElem(DependenceType::WAR, - LinalgOpView{src.getOperation(), srcIndex}, - LinalgOpView{dst.getOperation(), dstIndex}); - } - } + for (OpOperand *dstOpOperand : dst.getOutputBuffersOpOperands()) // W + if (aliases.alias(srcOpOperand->get(), dstOpOperand->get())) // WAR alias + addDependenceElem(DependenceType::WAR, srcOpOperand, dstOpOperand); } } @@ -248,17 +218,15 @@ LinalgDependenceGraph::findOperationsWithCoveringDependences( // TODO: we are not considering paths yet, just interleaved positions. for (auto dt : types) { for (auto dependence : getDependencesFrom(src, dt)) { - auto interimPos = linalgOpPositions.lookup(dependence.dependentOpView.op); + auto interimPos = + linalgOpPositions.lookup(dependence.dependentOpView->getOwner()); // Skip if not interleaved. if (interimPos >= dstPos || interimPos <= srcPos) continue; - linalg::LinalgOp consumer = - cast(dependence.indexingOpView.op); - Value consumerView = - consumer.getShapedOperand(dependence.indexingOpView.operandIndex); + Value consumerView = dependence.indexingOpView->get(); if (view && !aliases.alias(view, consumerView)) continue; - auto *op = dependence.dependentOpView.op; + auto *op = dependence.dependentOpView->getOwner(); LLVM_DEBUG(dbgs() << "\n***Found covering dependence of type " << getDependenceTypeStr(dt) << ": " << *src << " -> " << *op << " on " << consumerView); @@ -271,12 +239,10 @@ LinalgDependenceGraph::findOperationsWithCoveringDependences( bool LinalgDependenceGraph::hasDependenceFrom( LinalgOp srcLinalgOp, LinalgOp dstLinalgOp, ArrayRef depTypes) const { - for (auto dep : depTypes) { - for (auto dependence : getDependencesInto(dstLinalgOp, dep)) { - if (dependence.dependentOpView.op == srcLinalgOp) + for (auto dep : depTypes) + for (auto dependence : getDependencesInto(dstLinalgOp, dep)) + if (dependence.dependentOpView->getOwner() == srcLinalgOp) return true; - } - } return false; } diff --git a/mlir/lib/Dialect/Linalg/EDSC/Builders.cpp b/mlir/lib/Dialect/Linalg/EDSC/Builders.cpp index 0ae1efe10b7fd..3c3b2777d6c14 100644 --- a/mlir/lib/Dialect/Linalg/EDSC/Builders.cpp +++ b/mlir/lib/Dialect/Linalg/EDSC/Builders.cpp @@ -23,36 +23,25 @@ using namespace mlir::scf; Operation *mlir::edsc::makeGenericLinalgOp( ArrayRef iteratorTypes, ArrayRef inputs, - ArrayRef outputBuffers, ArrayRef initTensors, - ArrayRef resultTensorTypes, + ArrayRef outputs, TypeRange resultTensorTypes, function_ref regionBuilder, ArrayRef otherValues, ArrayRef otherAttributes) { OpBuilder &builder = edsc::ScopedContext::getBuilderRef(); // Build maps SmallVector, 4> exprsList; - exprsList.reserve(inputs.size() + outputBuffers.size() + initTensors.size()); - for (auto container : {inputs, outputBuffers, resultTensorTypes}) + exprsList.reserve(inputs.size() + outputs.size()); + + for (auto container : {inputs, outputs}) for (const StructuredIndexed &s : container) exprsList.emplace_back(s.getExprs().begin(), s.getExprs().end()); auto maps = AffineMap::inferFromExprList(exprsList); - SmallVector types; - assert(llvm::all_of(resultTensorTypes, [](const StructuredIndexed &s) { - return !s.hasValue(); - })); - std::copy(resultTensorTypes.begin(), resultTensorTypes.end(), - std::back_inserter(types)); - - SmallVector inputValues, outputBufferValues, initTensorValues; + SmallVector inputValues, outputValues; inputValues.reserve(inputs.size()); - outputBufferValues.reserve(outputBuffers.size()); - initTensorValues.reserve(initTensors.size()); + outputValues.reserve(outputs.size()); std::copy(inputs.begin(), inputs.end(), std::back_inserter(inputValues)); - std::copy(outputBuffers.begin(), outputBuffers.end(), - std::back_inserter(outputBufferValues)); - std::copy(initTensors.begin(), initTensors.end(), - std::back_inserter(initTensorValues)); + std::copy(outputs.begin(), outputs.end(), std::back_inserter(outputValues)); auto iteratorStrTypes = llvm::to_vector<8>(llvm::map_range(iteratorTypes, toString)); @@ -61,10 +50,9 @@ Operation *mlir::edsc::makeGenericLinalgOp( edsc::ScopedContext::getBuilderRef() .create( edsc::ScopedContext::getLocation(), - types, + resultTensorTypes, inputValues, - outputBufferValues, - initTensorValues, + outputValues, builder.getAffineMapArrayAttr(maps), builder.getStrArrayAttr(iteratorStrTypes), StringAttr() /*doc*/, @@ -77,12 +65,10 @@ Operation *mlir::edsc::makeGenericLinalgOp( using namespace edsc; SmallVector blockTypes; - blockTypes.reserve(inputs.size() + outputBuffers.size() + initTensors.size()); - for (auto container : {inputs, outputBuffers}) + blockTypes.reserve(inputs.size() + outputs.size()); + for (auto container : {inputs, outputs}) for (const StructuredIndexed &s : container) blockTypes.push_back(getElementTypeOrSelf(s.getType())); - for (Value v : initTensors) - blockTypes.push_back(getElementTypeOrSelf(v.getType())); assert(op->getNumRegions() == 1); assert(op->getRegion(0).empty()); @@ -119,11 +105,10 @@ Operation *mlir::edsc::ops::linalg_generic_pointwise( linalg_yield(unaryOp(a)); }; if (O.getType().isa()) - return makeGenericLinalgOp(iterTypes, /*inputs=*/{I}, /*outputBuffers=*/{}, - /*initTensors=*/{}, /*resultTensorTypes=*/{O}, - fun); - return makeGenericLinalgOp(iterTypes, /*inputs=*/{I}, /*outputBuffers=*/{O}, - /*initTensors=*/{}, /*resultTensorTypes=*/{}, fun); + return makeGenericLinalgOp(iterTypes, /*inputs=*/{I}, /*outputs=*/{O}, + /*resultTensorTypes=*/{O}, fun); + return makeGenericLinalgOp(iterTypes, /*inputs=*/{I}, /*outputs=*/{O}, + /*resultTensorTypes=*/{}, fun); } Operation *mlir::edsc::ops::linalg_generic_pointwise_tanh(StructuredIndexed I, @@ -144,12 +129,10 @@ Operation *mlir::edsc::ops::linalg_generic_pointwise( linalg_yield(binaryOp(a, b)); }; if (O.getType().isa()) - return makeGenericLinalgOp( - iterTypes, /*inputs=*/{I1, I2}, /*outputBuffers=*/{}, - /*initTensors=*/{}, /*resultTensorTypes=*/{O}, fun); + return makeGenericLinalgOp(iterTypes, /*inputs=*/{I1, I2}, /*outputs=*/{O}, + /*resultTensorTypes=*/{O}, fun); return makeGenericLinalgOp(iterTypes, /*inputs=*/{I1, I2}, - /*outputBuffers=*/{O}, - /*initTensors=*/{}, /*resultTensorTypes=*/{}, fun); + /*outputs=*/{O}, /*resultTensorTypes=*/{}, fun); } Operation *mlir::edsc::ops::linalg_generic_pointwise_add(StructuredIndexed I1, @@ -181,8 +164,7 @@ mlir::edsc::ops::linalg_generic_matmul(Value vA, Value vB, Value vC, return makeGenericLinalgOp( {IteratorType::Parallel, IteratorType::Parallel, IteratorType::Reduction}, /*inputs=*/{A({m, k}), B({k, n})}, - /*outputBuffers=*/{C({m, n})}, - /*initTensors=*/{}, + /*outputs=*/{C({m, n})}, /*resultTensorTypes=*/{}, regionBuilder); // clang-format on @@ -199,8 +181,7 @@ mlir::edsc::ops::linalg_generic_matmul(Value vA, Value vB, Value vC, return makeGenericLinalgOp( {IteratorType::Parallel, IteratorType::Parallel, IteratorType::Reduction}, /*inputs=*/{A({m, k}), B({k, n})}, - /*outputBuffers=*/{}, - /*initTensors=*/{C({m, n})}, + /*outputs=*/{C({m, n})}, /*resultTensorTypes=*/{D({m, n})}, regionBuilder); // clang-format on @@ -236,8 +217,7 @@ Operation *mlir::edsc::ops::linalg_generic_conv_nhwc(Value vI, Value vW, simplifyAffineExpr(s[1] * w + d[1] * kw, numDims, 0), c}), W({kh, kw, c, f}) }, - /*outputBuffers=*/{ O({b, h, w, f}) }, - /*initTensors=*/{}, + /*outputs=*/{ O({b, h, w, f}) }, /*resultTensorTypes=*/{}, macRegionBuilder); // clang-format on @@ -272,9 +252,8 @@ Operation *mlir::edsc::ops::linalg_generic_dilated_conv_nhwc( simplifyAffineExpr(s[1] * w + d[1] * kw, numDims, 0), c}), W({kh, kw, c, dm})}, - /*outputBuffers=*/{ + /*outputs=*/{ O({b, h, w, simplifyAffineExpr(c * depth_multiplier + dm, numDims, 0)})}, - /*initTensors=*/{}, /*resultTensorTypes=*/{}, macRegionBuilder); // clang-format on diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp index 3a7249df8e793..bcbd6d9036121 100644 --- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp +++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp @@ -88,22 +88,20 @@ SmallVector LinalgOp::createLoopRanges(OpBuilder &b, Location loc) { /// Forward declarations. template -static void buildNamedStructuredOpRegionAndAttributes( - OpBuilder &opBuilder, OperationState &result, TypeRange inputTypes, - TypeRange outputBufferTypes, TypeRange initTensorTypes, - TypeRange resultTypes); +static void buildNamedStructuredOpRegionAndAttributes(OpBuilder &opBuilder, + OperationState &result, + TypeRange inputTypes, + TypeRange outputTypes); static ParseResult parseCommonStructuredOpParts(OpAsmParser &parser, OperationState &result, SmallVectorImpl &inputTypes, - SmallVectorImpl &outputBufferTypes, - SmallVectorImpl &initTensorTypes); + SmallVectorImpl &outputTypes); template static ParseResult parseNamedStructuredOpRegion(OpAsmParser &parser, Region ®ion, - TypeRange inputTypes, TypeRange outputBufferTypes, - TypeRange initTensorTypes, TypeRange resultTypes); + TypeRange inputTypes, TypeRange outputTypes); static ParseResult parseNamedStructuredOpResults(OpAsmParser &parser, SmallVectorImpl &resultTypes); @@ -122,9 +120,6 @@ static void printNamedStructuredOpResults(OpAsmPrinter &p, template static void printNamedStructuredOp(OpAsmPrinter &p, NamedStructuredOpType op); -template -static LogicalResult verifyNamedStructuredOp(NamedStructuredOpType op); - /// This is a common class used for patterns of the form /// ``` /// someop(memrefcast) -> someop @@ -152,11 +147,10 @@ static LogicalResult foldMemRefCast(Operation *op) { //===----------------------------------------------------------------------===// void GenericOp::build( OpBuilder &builder, OperationState &result, TypeRange resultTensorTypes, - ValueRange inputs, ValueRange outputBuffers, ValueRange initTensors, - ArrayRef indexingMaps, ArrayRef iteratorTypes, - StringRef doc, StringRef libraryCall, + ValueRange inputs, ValueRange outputs, ArrayRef indexingMaps, + ArrayRef iteratorTypes, StringRef doc, StringRef libraryCall, function_ref bodyBuild) { - build(builder, result, resultTensorTypes, inputs, outputBuffers, initTensors, + build(builder, result, resultTensorTypes, inputs, outputs, builder.getAffineMapArrayAttr(indexingMaps), builder.getStrArrayAttr(iteratorTypes), doc.empty() ? StringAttr() : builder.getStringAttr(doc), @@ -166,7 +160,7 @@ void GenericOp::build( return; SmallVector blockArgTypes; - for (ValueRange container : {inputs, outputBuffers, initTensors}) + for (ValueRange container : {inputs, outputs}) for (Value v : container) blockArgTypes.push_back(v.getType().cast().getElementType()); @@ -178,41 +172,40 @@ void GenericOp::build( void GenericOp::build( OpBuilder &builder, OperationState &result, ValueRange inputs, - ValueRange outputBuffers, ArrayRef indexingMaps, + ValueRange outputs, ArrayRef indexingMaps, ArrayRef iteratorTypes, StringRef doc, StringRef libraryCall, function_ref bodyBuild) { - build(builder, result, TypeRange{}, inputs, outputBuffers, ValueRange{}, - indexingMaps, iteratorTypes, doc, libraryCall, bodyBuild); + build(builder, result, TypeRange{}, inputs, outputs, indexingMaps, + iteratorTypes, doc, libraryCall, bodyBuild); } void GenericOp::build( OpBuilder &builder, OperationState &result, ValueRange inputs, - ValueRange outputBuffers, ArrayRef indexingMaps, + ValueRange outputs, ArrayRef indexingMaps, ArrayRef iteratorTypes, function_ref bodyBuild) { - build(builder, result, inputs, outputBuffers, indexingMaps, iteratorTypes, + build(builder, result, inputs, outputs, indexingMaps, iteratorTypes, /*doc=*/"", /*libraryCall=*/"", bodyBuild); } void GenericOp::build( OpBuilder &builder, OperationState &result, TypeRange resultTensorTypes, - ValueRange inputs, ValueRange outputBuffers, ValueRange initTensors, - ArrayRef indexingMaps, ArrayRef iteratorTypes, + ValueRange inputs, ValueRange outputs, ArrayRef indexingMaps, + ArrayRef iteratorTypes, function_ref bodyBuild) { - build(builder, result, resultTensorTypes, inputs, outputBuffers, initTensors, - indexingMaps, iteratorTypes, + build(builder, result, resultTensorTypes, inputs, outputs, indexingMaps, + iteratorTypes, /*doc=*/"", /*libraryCall=*/"", bodyBuild); } void IndexedGenericOp::build( OpBuilder &builder, OperationState &result, TypeRange resultTensorTypes, - ValueRange inputs, ValueRange outputBuffers, ValueRange initTensors, - ArrayRef indexingMaps, ArrayRef iteratorTypes, - StringRef doc, StringRef libraryCall, + ValueRange inputs, ValueRange outputs, ArrayRef indexingMaps, + ArrayRef iteratorTypes, StringRef doc, StringRef libraryCall, function_ref bodyBuild) { - build(builder, result, resultTensorTypes, inputs, outputBuffers, initTensors, + build(builder, result, resultTensorTypes, inputs, outputs, builder.getAffineMapArrayAttr(indexingMaps), builder.getStrArrayAttr(iteratorTypes), doc.empty() ? StringAttr() : builder.getStringAttr(doc), @@ -223,7 +216,7 @@ void IndexedGenericOp::build( unsigned nLoops = iteratorTypes.size(); SmallVector blockArgTypes(nLoops, builder.getIndexType()); - for (ValueRange container : {inputs, outputBuffers, initTensors}) + for (ValueRange container : {inputs, outputs}) for (Value v : container) blockArgTypes.push_back(v.getType().cast().getElementType()); @@ -237,32 +230,32 @@ void IndexedGenericOp::build( void IndexedGenericOp::build( OpBuilder &builder, OperationState &result, ValueRange inputs, - ValueRange outputBuffers, ArrayRef indexingMaps, + ValueRange outputs, ArrayRef indexingMaps, ArrayRef iteratorTypes, StringRef doc, StringRef libraryCall, function_ref bodyBuild) { - build(builder, result, TypeRange{}, inputs, outputBuffers, ValueRange{}, - indexingMaps, iteratorTypes, doc, libraryCall, bodyBuild); + build(builder, result, TypeRange{}, inputs, outputs, indexingMaps, + iteratorTypes, doc, libraryCall, bodyBuild); } void IndexedGenericOp::build( OpBuilder &builder, OperationState &result, ValueRange inputs, - ValueRange outputBuffers, ArrayRef indexingMaps, + ValueRange outputs, ArrayRef indexingMaps, ArrayRef iteratorTypes, function_ref bodyBuild) { - build(builder, result, inputs, outputBuffers, indexingMaps, iteratorTypes, + build(builder, result, inputs, outputs, indexingMaps, iteratorTypes, /*doc=*/"", /*libraryCall=*/"", bodyBuild); } void IndexedGenericOp::build( OpBuilder &builder, OperationState &result, TypeRange resultTensorTypes, - ValueRange inputs, ValueRange outputBuffers, ValueRange initTensors, - ArrayRef indexingMaps, ArrayRef iteratorTypes, + ValueRange inputs, ValueRange outputs, ArrayRef indexingMaps, + ArrayRef iteratorTypes, function_ref bodyBuild) { - build(builder, result, resultTensorTypes, inputs, outputBuffers, initTensors, - indexingMaps, iteratorTypes, + build(builder, result, resultTensorTypes, inputs, outputs, indexingMaps, + iteratorTypes, /*doc=*/"", /*libraryCall=*/"", bodyBuild); } @@ -327,9 +320,8 @@ static ParseResult parseGenericOp(OpAsmParser &parser, OperationState &result) { dictAttr.getValue().end()); // Parsing is shared with named ops, except for the region. - SmallVector inputTypes, outputBufferTypes, initTensorTypes; - if (parseCommonStructuredOpParts(parser, result, inputTypes, - outputBufferTypes, initTensorTypes)) + SmallVector inputTypes, outputTypes; + if (parseCommonStructuredOpParts(parser, result, inputTypes, outputTypes)) return failure(); // Optional attributes may be added. @@ -360,7 +352,7 @@ static ParseResult parseGenericOp(OpAsmParser &parser, OperationState &result) { static void getGenericEffectsImpl( SmallVectorImpl> &effects, - ValueRange results, ValueRange inputBuffers, ValueRange outputBuffers) { + ValueRange results, ValueRange inputBuffers, ValueRange outputs) { for (Value value : results) { effects.emplace_back(MemoryEffects::Allocate::get(), value, SideEffects::DefaultResource::get()); @@ -369,7 +361,7 @@ static void getGenericEffectsImpl( effects.emplace_back(MemoryEffects::Read::get(), value, SideEffects::DefaultResource::get()); } - for (Value value : outputBuffers) { + for (Value value : outputs) { effects.emplace_back(MemoryEffects::Read::get(), value, SideEffects::DefaultResource::get()); effects.emplace_back(MemoryEffects::Write::get(), value, @@ -391,65 +383,150 @@ void IndexedGenericOp::getEffects( getInputBuffers(), getOutputBuffers()); } -namespace { +LogicalResult mlir::linalg::detail::verifyStructuredOpInterface(Operation *op) { + LinalgOp linalgOp = cast(op); + // Expect at least one shaped operand. + // This means an op that constructs a tensor out of indices cannot be a + // LinalgOp at the moment. For now this will have to be a special op until we + // have output shape operands that are not tensors. + auto nShapedOperands = linalgOp.getNumShapedOperands(); + if (nShapedOperands == 0) + return linalgOp.emitOpError("expected at least 1 Shaped operand"); + if (failed(OpTrait::impl::verifyAtLeastNOperands(op, nShapedOperands))) + return failure(); + // Should have at least one output tensor per result tensor. + // Can also have outbut buffers that do not correspond to results. + if (op->getNumResults() > linalgOp.getNumOutputTensors()) + return op->emitError("unexpected #results > #outputs"); + + // All shaped operands must be indexed. + if (linalgOp.indexing_maps().size() != linalgOp.getNumShapedOperands()) + return linalgOp.emitOpError("expected the number of indexing_map (") + << linalgOp.indexing_maps().size() + << ") to be equal to the number of shaped operands (" + << linalgOp.getNumShapedOperands() << ")"; -template -struct BlockArgsVerifier { - static LogicalResult verify(GenericOpType op, Block &block); -}; + SmallVector indexingMaps; + indexingMaps.reserve(linalgOp.indexing_maps().size()); + for (auto en : llvm::enumerate(linalgOp.indexing_maps())) { + auto idx = en.index(); + auto m = en.value().template cast().getValue(); + indexingMaps.push_back(m); // Save reference to map for further checks. + auto shapedValue = linalgOp.getShapedType(idx); -template -LogicalResult BlockArgsVerifier::verify(GenericOpType op, - Block &block) { - auto nOperands = op.getNumOperands(); - if (block.getNumArguments() != nOperands) - return op.emitOpError("expected number of block arguments to match number " - "of operands"); + // Symbols disallowed. + if (m.getNumSymbols() != 0) + return linalgOp.emitOpError("unexpected symbols in indexing_map #") + << idx; - // Note: the number and type of yield values are checked in the YieldOp. - auto nInputViews = op.getNumInputs(); - for (unsigned i = 0; i < nOperands; ++i) { - auto viewType = op.getShapedType(i); - if (viewType.getElementType() != block.getArgument(i).getType()) - return op.emitOpError("expected block argument ") - << (i + 1) << " of the same type as elemental type of " - << ((i < nInputViews) ? "input " : "output ") - << "operand: " << viewType; + // Domain must be consistent. + auto nLoops = linalgOp.getNumLoops(); + if (m.getNumDims() != nLoops) + return linalgOp.emitOpError("expected indexing_map #") + << idx << " to have " << nLoops + << " dim(s) to match the number of loops"; + + if (m.getNumResults() != shapedValue.getRank()) + return linalgOp.emitOpError("expected shaped value rank (") + << shapedValue.getRank() + << ") to match the result rank of indexing_map #" << idx << " (" + << m.getNumResults() << ")"; } - return success(); -} -template <> -LogicalResult BlockArgsVerifier::verify(IndexedGenericOp op, - Block &block) { - auto nInputViews = op.getNumInputs(); - auto nLoops = op.getNumLoops(); - auto nOperands = op.getNumOperands(); - if (block.getNumArguments() != nOperands + nLoops) - return op.emitOpError( - "expected number of block arguments to match number of operands + " - "number of loops"); + SmallVector redDims; + linalgOp.getReductionDims(redDims); + + // Simplifying assumption: either full tensor or full buffer mode. + // This allows simpler verification of output operands vs result types + // without premature tracking of which operand is what in mixed-mode. + // TODO: relax when mixed-mode needs to pass verification. + if (linalgOp.getNumOutputBuffers() > 0 && linalgOp.getNumOutputTensors() > 0) + return op->emitError("expected output operands to all have tensor type or " + "all have buffer type"); + + for (auto it : + llvm::zip(linalgOp.getOutputOpOperands(), op->getResultTypes())) { + if (!std::get<0>(it).get().getType().isa()) + continue; + if (std::get<0>(it).get().getType() != std::get<1>(it)) + return op->emitError("expected type of operand #") + << std::get<0>(it).getOperandNumber() << " (" + << std::get<0>(it).get().getType() << ")" + << " to match type of corresponding result (" << std::get<1>(it) + << ")"; + } + + // Output tensor indexing map may not depend on reduction indices. + for (OpOperand &opOperand : linalgOp.getOutputOpOperands()) { + AffineMap outputMap = linalgOp.getIndexingMap(opOperand.getOperandNumber()); + for (auto expr : outputMap.getResults()) { + for (auto dim : redDims) { + unsigned pos = dim.cast().getPosition(); + if (expr.isFunctionOfDim(pos)) { + std::string exprStr; + { + llvm::raw_string_ostream os(exprStr); + os << expr; + } + return op->emitError( + "unexpected output tensor expression in indexing map #") + << (opOperand.getOperandNumber() - linalgOp.getNumInputs()) + << " a.k.a '" << exprStr + << "' is function of reduction iterator 'd" << pos << "'"; + } + } + } + } + + // Named ops that are defined manually have a region builder but no region at + // this time. Assume the region is well-formed by specification. + // TODO: use linalg-ods-gen for all ops when we have enough expressive power. + if (linalgOp->getNumRegions() == 0) { + assert(!linalgOp.getRegionBuilder() && "regionBuilder but no region"); + return success(); + } + + auto ®ion = linalgOp->getRegion(0); + if (linalgOp->getNumRegions() > 1 || !llvm::hasSingleElement(region)) + return op->emitOpError("expected 1 region with 1 block"); + + if (!linalgOp.getShapesToLoopsMap()) + return op->emitOpError("expected the shape-to-loops map to be non-null"); + + // Simplifying assumption: bbargs match 1-1 with shape operands elemental + // types. + // TODO: once ranked shape types are plugged in, we may want to drop the + // corresponding bbargs, that can never be read from. This will be subject to + // consistency discussions (i.e. what to do with output tensors whose bbarg is + // not used). + Block &block = linalgOp->getRegion(0).front(); + unsigned numBBIvs = linalgOp.getNumPayloadInductionVariables(); + + if (linalgOp.getNumShapedOperands() + numBBIvs != block.getNumArguments()) + return op->emitError("expected as many non-induction variable region " + "arguments as the number of shaped operands"); // Note: the number and type of yield values are checked in the YieldOp. - for (unsigned i = 0; i < nLoops; ++i) + for (unsigned i = 0; i < numBBIvs; ++i) if (!block.getArgument(i).getType().isIndex()) - return op.emitOpError("expected block argument ") - << (i + 1) << " to be an index"; - - for (unsigned i = 0; i < nOperands; ++i) { - unsigned memrefArgIndex = i + nLoops; - auto viewType = op.getShapedType(i); - if (viewType.getElementType() != - block.getArgument(memrefArgIndex).getType()) - return op.emitOpError("expected block argument ") - << (memrefArgIndex + 1) - << " of the same type as elemental type of " - << ((i < nInputViews) ? "input " : "output ") - << "operand: " << viewType; + return op->emitOpError("expected index block argument #") << i; + + unsigned idx = 0; + for (auto it : llvm::zip(linalgOp.getShapedOperandTypes(), + block.getArguments().drop_front(numBBIvs))) { + if (std::get<0>(it).getElementType() != std::get<1>(it).getType()) + return op->emitError("expected type of bb argument #") + << (idx + numBBIvs) << " (" << std::get<1>(it).getType() << ")" + << " to match element type of corresponding shaped operand (" + << std::get<0>(it).getElementType() << ")"; + ++idx; } + return success(); } +namespace { + template struct AnnotationsVerifier { static LogicalResult verify(GenericOpType op) { return success(); } @@ -465,7 +542,7 @@ LogicalResult AnnotationsVerifier::verify(GenericOp op) { return op.emitOpError("expected sparse annotations on tensors only"); if (op.getNumOutputs() != 1) return op.emitOpError("expected single output tensor"); - unsigned numTensors = op.getNumInputsAndOutputs(); + unsigned numTensors = op.getNumShapedOperands(); if (sparseAttr.size() != numTensors) return op.emitOpError("expected one sparse annotation for each tensor"); for (unsigned t = 0; t < numTensors; t++) { @@ -497,49 +574,6 @@ LogicalResult AnnotationsVerifier::verify(GenericOp op) { template static LogicalResult verifyGenericOp(GenericOpType op) { - auto nLoops = op.getNumLoops(); - - if (op.inputs().size() + op.output_buffers().size() + - op.init_tensors().size() + op.getNumResults() == - 0) - return op.emitOpError("expected at least 1 Shaped operand or return"); - - auto ®ion = op.region(); - if (!llvm::hasSingleElement(region)) - return op.emitOpError("expected region with 1 block"); - if (failed(BlockArgsVerifier::verify(op, region.front()))) - return failure(); - - if (op.indexing_maps().size() != op.getNumInputsAndOutputs()) - return op.emitOpError("expected the number of indexing_map (") - << op.indexing_maps().size() - << ") to be equal to the number of inputs and outputs (" - << op.getNumInputsAndOutputs() << ")"; - - SmallVector indexingMaps; - indexingMaps.reserve(op.indexing_maps().size()); - for (auto en : llvm::enumerate(op.indexing_maps())) { - auto idx = en.index(); - auto m = en.value().template cast().getValue(); - indexingMaps.push_back(m); // Save reference to map for further checks. - auto view = op.getShapedType(idx); - - if (m.getNumSymbols() != 0) - return op.emitOpError("unexpected symbols in indexing_map #") << idx; - - if (m.getNumDims() != nLoops) - return op.emitOpError("expected indexing_map #") - << idx << " to have " << nLoops - << " dim(s) to match the number of loops"; - - if (m.getNumResults() != view.getRank()) - return op.emitOpError("expected indexing_map #") - << idx << " results to match view rank: " << view; - } - - if (!op.getShapesToLoopsMap()) - return op.emitOpError("expected the shape-to-loops map to be non-null"); - if (failed(AnnotationsVerifier::verify(op))) return failure(); @@ -1380,8 +1414,6 @@ static LogicalResult verify(ConvOp op) { return op.emitOpError("expects memref elemental types to match"); if (oType.getRank() != iType.getRank() || oType.getRank() != fType.getRank()) return op.emitOpError("expects memref ranks to match"); - if (oType.getRank() <= 2) - return op.emitOpError("expects memref ranks to be greater than 2"); if (auto strides = op.strides()) { if (failed( verifyStrideOrDilation(op, strides->getValue(), /*isStride=*/true))) @@ -1591,13 +1623,12 @@ OpFoldResult TensorReshapeOp::fold(ArrayRef operands) { template static void buildNamedStructuredOpRegionAndAttributesImpl( OpBuilder &opBuilder, Region ®ion, TypeRange inputTypes, - TypeRange outputBufferTypes, TypeRange initTensorTypes, - TypeRange resultTypes, + TypeRange outputTypes, std::function errorHandler) { // TODO: atm all operands go through getElementTypeOrSelf, // reconsider when we have evidence we need to. SmallVector argTypes; - for (auto containers : {inputTypes, outputBufferTypes, resultTypes}) + for (auto containers : {inputTypes, outputTypes}) for (auto t : containers) argTypes.push_back(getElementTypeOrSelf(t)); @@ -1622,13 +1653,11 @@ template void buildNamedStructuredOpRegionAndAttributes(OpBuilder &opBuilder, OperationState &result, TypeRange inputTypes, - TypeRange outputBufferTypes, - TypeRange initTensorTypes, - TypeRange resultTypes) { + TypeRange outputTypes) { Region ®ion = *result.addRegion(); buildNamedStructuredOpRegionAndAttributesImpl( - opBuilder, region, inputTypes, outputBufferTypes, initTensorTypes, - resultTypes, [&](unsigned expected, unsigned actual) { + opBuilder, region, inputTypes, outputTypes, + [&](unsigned expected, unsigned actual) { llvm::errs() << "region expects " << expected << " args, got " << actual; assert(expected != actual && "incorrect number of arguments"); @@ -1638,13 +1667,12 @@ void buildNamedStructuredOpRegionAndAttributes(OpBuilder &opBuilder, template static ParseResult parseNamedStructuredOpRegion(OpAsmParser &parser, Region ®ion, - TypeRange inputTypes, TypeRange outputBufferTypes, - TypeRange initTensorTypes, TypeRange resultTypes) { + TypeRange inputTypes, TypeRange outputTypes) { ParseResult res = success(); OpBuilder opBuilder(parser.getBuilder().getContext()); buildNamedStructuredOpRegionAndAttributesImpl( - opBuilder, region, inputTypes, outputBufferTypes, initTensorTypes, - resultTypes, [&](unsigned expected, unsigned actual) { + opBuilder, region, inputTypes, outputTypes, + [&](unsigned expected, unsigned actual) { res = parser.emitError(parser.getCurrentLocation(), llvm::formatv("region expects {0} args, got {1}", expected, actual)); @@ -1664,12 +1692,9 @@ parseNamedStructuredOpResults(OpAsmParser &parser, static ParseResult parseCommonStructuredOpParts(OpAsmParser &parser, OperationState &result, SmallVectorImpl &inputTypes, - SmallVectorImpl &outputBufferTypes, - SmallVectorImpl &initTensorTypes) { - llvm::SMLoc inputsOperandsLoc, outputBuffersOperandsLoc, - initTensorsOperandsLoc; - SmallVector inputsOperands, - outputBuffersOperands, initTensorsOperands; + SmallVectorImpl &outputTypes) { + llvm::SMLoc inputsOperandsLoc, outputsOperandsLoc; + SmallVector inputsOperands, outputsOperands; parser.parseOptionalAttrDict(result.attributes); @@ -1684,41 +1709,30 @@ parseCommonStructuredOpParts(OpAsmParser &parser, OperationState &result, } if (succeeded(parser.parseOptionalKeyword("outs"))) { - outputBuffersOperandsLoc = parser.getCurrentLocation(); - if (parser.parseLParen() || - parser.parseOperandList(outputBuffersOperands) || - parser.parseColonTypeList(outputBufferTypes) || parser.parseRParen()) - return failure(); - } - if (succeeded(parser.parseOptionalKeyword("init"))) { - initTensorsOperandsLoc = parser.getCurrentLocation(); - if (parser.parseLParen() || parser.parseOperandList(initTensorsOperands) || - parser.parseColonTypeList(initTensorTypes) || parser.parseRParen()) + outputsOperandsLoc = parser.getCurrentLocation(); + if (parser.parseLParen() || parser.parseOperandList(outputsOperands) || + parser.parseColonTypeList(outputTypes) || parser.parseRParen()) return failure(); } if (parser.resolveOperands(inputsOperands, inputTypes, inputsOperandsLoc, result.operands) || - parser.resolveOperands(outputBuffersOperands, outputBufferTypes, - outputBuffersOperandsLoc, result.operands) || - parser.resolveOperands(initTensorsOperands, initTensorTypes, - initTensorsOperandsLoc, result.operands)) + parser.resolveOperands(outputsOperands, outputTypes, outputsOperandsLoc, + result.operands)) return failure(); result.addAttribute("operand_segment_sizes", parser.getBuilder().getI32VectorAttr( {static_cast(inputsOperands.size()), - static_cast(outputBuffersOperands.size()), - static_cast(initTensorsOperands.size())})); + static_cast(outputsOperands.size())})); return success(); } template static ParseResult parseNamedStructuredOp(OpAsmParser &parser, OperationState &result) { - SmallVector inputTypes, outputBufferTypes, initTensorTypes; - if (parseCommonStructuredOpParts(parser, result, inputTypes, - outputBufferTypes, initTensorTypes)) + SmallVector inputTypes, outputTypes; + if (parseCommonStructuredOpParts(parser, result, inputTypes, outputTypes)) return failure(); // TODO: consider merging results parsing into region parsing. @@ -1730,8 +1744,7 @@ static ParseResult parseNamedStructuredOp(OpAsmParser &parser, std::unique_ptr region = std::make_unique(); if (parseNamedStructuredOpRegion( - parser, *region, inputTypes, outputBufferTypes, initTensorTypes, - outputTensorsTypes)) + parser, *region, inputTypes, outputTypes)) return failure(); result.addRegion(std::move(region)); @@ -1750,12 +1763,8 @@ static void printCommonStructuredOpParts(OpAsmPrinter &p, NamedStructuredOpType op) { if (!op.inputs().empty()) p << " ins(" << op.inputs() << " : " << op.inputs().getTypes() << ")"; - if (!op.output_buffers().empty()) - p << " outs(" << op.output_buffers() << " : " - << op.output_buffers().getTypes() << ")"; - if (!op.init_tensors().empty()) - p << " init(" << op.init_tensors() << " : " << op.init_tensors().getTypes() - << ") "; + if (!op.outputs().empty()) + p << " outs(" << op.outputs() << " : " << op.outputs().getTypes() << ")"; } template @@ -1789,7 +1798,7 @@ struct EraseDeadLinalgOp : public RewritePattern { auto linalgOp = dyn_cast(op); if (!linalgOp) return failure(); - for (Value v : linalgOp.getInputsAndOutputBuffers()) { + for (Value v : linalgOp.getShapedOperands()) { // Linalg "inputs" may be either tensor or memref type. // tensor<0xelt_type> is a convention that may not always mean // "0 iterations". Only erase in cases we see memref<...x0x...>. @@ -1836,11 +1845,8 @@ struct FoldTensorCastOp : public RewritePattern { newOperands.push_back( canFoldIntoConsumerOp(tensorCastOp) ? tensorCastOp.source() : v); } - // Output buffers are memrefs, they don't fold. - newOperands.append(linalgOp.getOutputBuffers().begin(), - linalgOp.getOutputBuffers().end()); // Init tensors may fold, in which case the resultType must also change. - for (Value v : linalgOp.getInitTensors()) { + for (Value v : linalgOp.getOutputs()) { auto tensorCastOp = v.getDefiningOp(); bool fold = canFoldIntoConsumerOp(tensorCastOp); newOperands.push_back(fold ? tensorCastOp.getOperand() : v); @@ -1904,8 +1910,7 @@ struct DeduplicateInputs : public RewritePattern { for (auto v : llvm::enumerate(linalgOp.getInputs())) if (canonicalInputIndices[v.index()] == static_cast(v.index())) newOperands.push_back(v.value()); - llvm::append_range(newOperands, linalgOp.getOutputBuffers()); - llvm::append_range(newOperands, linalgOp.getInitTensors()); + llvm::append_range(newOperands, linalgOp.getOutputs()); llvm::append_range(newOperands, linalgOp.getAssumedNonShapedOperands()); // Clone the old op with new operands. @@ -1929,11 +1934,8 @@ struct DeduplicateInputs : public RewritePattern { newLinalgOp.setNumInputs(canonicalInput.size()); // linalg.indexed_generic payloads have additional arguments prepended to - // the block arg list. The number of such args is one per dimension of the - // iteration space. - int bbArgBaseOffset = 0; - if (isa(op)) - bbArgBaseOffset = newIndexingMaps[0].getNumInputs(); + // the block arg list. + int bbArgBaseOffset = newLinalgOp.getNumPayloadInductionVariables(); // Repair the payload entry block by RAUW'ing redundant arguments and // erasing them. diff --git a/mlir/lib/Dialect/Linalg/Transforms/Bufferize.cpp b/mlir/lib/Dialect/Linalg/Transforms/Bufferize.cpp index b36d74bad3fbc..a3ab6f45b26eb 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Bufferize.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Bufferize.cpp @@ -21,21 +21,22 @@ using namespace ::mlir; using namespace ::mlir::linalg; -static Value maybeConvertToIndex(Location loc, Value val, OpBuilder &b) { - if (val.getType().isIndex()) - return val; - return b.create(loc, val, b.getIndexType()); -} - -static Value cloneMemref(Location loc, Value memref, OpBuilder &b) { - auto memrefType = memref.getType().cast(); +static SmallVector getDynOperands(Location loc, Value val, + OpBuilder &b) { SmallVector dynOperands; - for (auto dim : llvm::enumerate(memrefType.getShape())) { + auto shapedType = val.getType().cast(); + for (auto dim : llvm::enumerate(shapedType.getShape())) { if (dim.value() == TensorType::kDynamicSize) { - dynOperands.push_back(b.create(loc, memref, dim.index())); + dynOperands.push_back(b.create(loc, val, dim.index())); } } - auto alloc = b.create(loc, memrefType, dynOperands); + return dynOperands; +} + +static Value cloneMemref(Location loc, Value memref, OpBuilder &b) { + auto memrefType = memref.getType().cast(); + auto alloc = + b.create(loc, memrefType, getDynOperands(loc, memref, b)); b.create(loc, memref, alloc); return alloc; } @@ -48,6 +49,7 @@ allocateBuffersForResults(Location loc, LinalgOp linalgOp, SmallVector loopRanges; // Allocate a buffer for every tensor result. + assert(linalgOp.getNumOutputs() == linalgOp->getNumResults()); for (auto en : llvm::enumerate(linalgOp->getResultTypes())) { size_t resultIndex = en.index(); Type resultType = en.value(); @@ -60,46 +62,26 @@ allocateBuffersForResults(Location loc, LinalgOp linalgOp, } auto tensorShape = tensorType.getShape(); auto memrefType = MemRefType::get(tensorShape, tensorType.getElementType()); + Value resultTensor = adaptor.outputs()[resultIndex]; - // Allocate buffers for init tensors that are assumed to fold onto the first - // results. - // TODO: update this assumption because the reality is more complex - // under linalg on tensor based transformations. - bool hasInitTensor = resultIndex < linalgOp.getNumInitTensors(); - if (hasInitTensor) { - resultBuffers.push_back( - cloneMemref(loc, adaptor.init_tensors()[resultIndex], b)); + // Clone output buffers whose value is actually used. + if (linalgOp.payloadUsesValueFromOutputOperandIndex(resultIndex)) { + resultBuffers.push_back(cloneMemref(loc, resultTensor, b)); continue; } + if (auto alloc = resultTensor.getDefiningOp()) { + resultBuffers.push_back(resultTensor); + continue; + } // Allocate buffers for statically-shaped results. if (memrefType.hasStaticShape()) { resultBuffers.push_back(b.create(loc, memrefType)); continue; } - // Perform a naive shape inference for the dynamically-shaped results. - // Extract the required element out of the vector. - SmallVector dynOperands; - auto resultIndexingMap = linalgOp.getOutputIndexingMap(resultIndex); - for (auto shapeElement : llvm::enumerate(tensorType.getShape())) { - if (loopRanges.empty()) - loopRanges = linalgOp.createLoopRanges(b, loc); - if (shapeElement.value() != ShapedType::kDynamicSize) - continue; - AffineExpr expr = resultIndexingMap.getResult(shapeElement.index()); - switch (expr.getKind()) { - case AffineExprKind::DimId: { - int64_t loopIndex = expr.cast().getPosition(); - Value size = maybeConvertToIndex(loc, loopRanges[loopIndex].size, b); - dynOperands.push_back(size); - break; - } - default: - return failure(); - } - } - resultBuffers.push_back(b.create(loc, memrefType, dynOperands)); + resultBuffers.push_back(b.create( + loc, memrefType, getDynOperands(loc, resultTensor, b))); } return success(); } @@ -119,8 +101,7 @@ finalizeBufferAllocationForGenericOp(ConversionPatternRewriter &rewriter, genericOp.getLoc(), /*resultTensorTypes=*/llvm::None, /*inputs=*/inputs, - /*outputBuffers=*/outputs, - /*initTensors=*/llvm::None, genericOp.indexing_maps(), + /*outputs=*/outputs, genericOp.indexing_maps(), genericOp.iterator_types(), genericOp.docAttr(), genericOp.library_callAttr(), genericOp.sparseAttr()); @@ -130,10 +111,6 @@ finalizeBufferAllocationForGenericOp(ConversionPatternRewriter &rewriter, Block *newBlock = rewriter.createBlock(&newRegion, newRegion.begin(), oldBlock->getArgumentTypes()); - // Add the result arguments to the new block. - for (Value v : ValueRange(outputs).drop_front(genericOp.getNumInitTensors())) - newBlock->addArgument(v.getType().cast().getElementType()); - // Clone the body of the old block to the new block. BlockAndValueMapping mapping; mapping.map(oldBlock->getArguments(), newBlock->getArguments()); @@ -159,12 +136,8 @@ static void finalizeBufferAllocation(ConversionPatternRewriter &rewriter, newOperands.append(outputs.begin(), outputs.end()); auto otherOperands = linalgOp.getAssumedNonShapedOperands(); newOperands.append(otherOperands.begin(), otherOperands.end()); - LinalgOp res = cast(linalgOp.clone(rewriter, linalgOp.getLoc(), - /*resultTypes=*/ArrayRef{}, - newOperands)); - // Need to mutate the operands_segment_sizes in the resulting op. - res.setNumOutputBuffers(outputs.size()); - res.setNumInitTensors(0); + linalgOp.clone(rewriter, linalgOp.getLoc(), + /*resultTypes=*/ArrayRef{}, newOperands); // Replace the results of the old op with the new output buffers. rewriter.replaceOp(linalgOp, outputs); } @@ -174,6 +147,24 @@ static void finalizeBufferAllocation(ConversionPatternRewriter &rewriter, //===----------------------------------------------------------------------===// namespace { + +/// Generic conversion pattern that matches any LinalgOp. This avoids template +/// instantiating one pattern for each LinalgOp. +class BufferizeInitTensorOp : public OpConversionPattern { +public: + using OpConversionPattern::OpConversionPattern; + + LogicalResult + matchAndRewrite(InitTensorOp op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const final { + linalg::InitTensorOpAdaptor adaptor(operands, op->getAttrDictionary()); + rewriter.replaceOpWithNewOp( + op, getTypeConverter()->convertType(op.getType()).cast(), + adaptor.sizes()); + return success(); + } +}; + /// Generic conversion pattern that matches any LinalgOp. This avoids template /// instantiating one pattern for each LinalgOp. class BufferizeAnyLinalgOp : public ConversionPattern { @@ -190,13 +181,12 @@ class BufferizeAnyLinalgOp : public ConversionPattern { return failure(); // We abuse the GenericOpAdaptor here. - // TODO: Manually create an Adaptor that captures inputs, output_buffers and - // init_tensors for all linalg::LinalgOp interface ops. + // TODO: Manually create an Adaptor that captures inputs and outputs for all + // linalg::LinalgOp interface ops. linalg::GenericOpAdaptor adaptor(operands, op->getAttrDictionary()); Location loc = linalgOp.getLoc(); - SmallVector newOutputBuffers(adaptor.output_buffers().begin(), - adaptor.output_buffers().end()); + SmallVector newOutputBuffers; if (failed(allocateBuffersForResults(loc, linalgOp, adaptor, newOutputBuffers, rewriter))) { @@ -327,7 +317,7 @@ struct LinalgBufferizePass : public LinalgBufferizeBase { // Mark all Standard operations legal. target.addLegalDialect(); - target.addIllegalOp(); + target.addIllegalOp(); // Mark all Linalg operations illegal as long as they work on tensors. auto isLegalOperation = [&](Operation *op) { @@ -354,10 +344,11 @@ void mlir::linalg::populateLinalgBufferizePatterns( OwningRewritePatternList &patterns) { patterns.insert(typeConverter); // TODO: Drop this once tensor constants work in standard. + // clang-format off patterns.insert< - // clang-format off + BufferizeInitTensorOp, SubTensorOpConverter, SubTensorInsertOpConverter - // clang-format on - >(typeConverter, context); + >(typeConverter, context); + // clang-format on } diff --git a/mlir/lib/Dialect/Linalg/Transforms/DropUnitDims.cpp b/mlir/lib/Dialect/Linalg/Transforms/DropUnitDims.cpp index bf488f827f898..8d09d58b9d7a9 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/DropUnitDims.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/DropUnitDims.cpp @@ -189,7 +189,7 @@ struct FoldUnitDimLoops : public OpRewritePattern { if (!invertedMap) return failure(); SmallVector dims; - for (ShapedType shapedType : op.getInputOutputShapedTypes()) + for (ShapedType shapedType : op.getShapedOperandTypes()) dims.append(shapedType.getShape().begin(), shapedType.getShape().end()); DenseSet unitDims; ArrayAttr iteratorTypes = op.iterator_types(); @@ -295,7 +295,7 @@ struct ReplaceUnitExtentTensors : public OpRewritePattern { LogicalResult matchAndRewrite(GenericOpTy op, PatternRewriter &rewriter) const override { // TODO: support init_tensors and reductions. - if (!op.hasTensorSemantics() || !op.init_tensors().empty()) + if (!op.hasTensorSemantics() || op.getNumInitTensors() != 0) return failure(); MLIRContext *context = rewriter.getContext(); @@ -306,7 +306,7 @@ struct ReplaceUnitExtentTensors : public OpRewritePattern { SmallVector newInputOutputTypes; bool doCanonicalization = false; for (auto it : - llvm::zip(op.getIndexingMaps(), op.getInputOutputShapedTypes())) { + llvm::zip(op.getIndexingMaps(), op.getShapedOperandTypes())) { auto replacementInfo = replaceUnitExtents( std::get<0>(it), std::get<1>(it).template cast(), context); @@ -342,19 +342,16 @@ struct ReplaceUnitExtentTensors : public OpRewritePattern { }; SmallVector newInputs = insertReshapes(op.inputs()); - SmallVector newOutputBuffers = - insertReshapes(op.output_buffers()); - SmallVector newInitTensors = insertReshapes(op.init_tensors()); + SmallVector newOutputs = insertReshapes(op.outputs()); - // If any result type change, insert a reshape to convert from the original + // If any result type changes, insert a reshape to convert from the original // type to the new type. SmallVector resultTypes; resultTypes.reserve(op.getNumResults()); for (unsigned i : llvm::seq(0, op.getNumResults())) resultTypes.push_back(newInputOutputTypes[i + op.getNumInputs()]); GenericOpTy replacementOp = rewriter.create( - loc, resultTypes, newInputs, newOutputBuffers, newInitTensors, - newIndexingMaps, + loc, resultTypes, newInputs, newOutputs, newIndexingMaps, llvm::to_vector<4>( op.iterator_types().template getAsValueRange())); rewriter.inlineRegionBefore(op.region(), replacementOp.region(), @@ -364,7 +361,7 @@ struct ReplaceUnitExtentTensors : public OpRewritePattern { // the original shape. SmallVector resultReplacements; for (auto result : llvm::enumerate(replacementOp.getResults())) { - unsigned index = result.index() + replacementOp.getNumOperands(); + unsigned index = result.index() + replacementOp.getNumInputs(); RankedTensorType origResultType = op.getResult(result.index()) .getType() .template cast(); diff --git a/mlir/lib/Dialect/Linalg/Transforms/ElementwiseToLinalg.cpp b/mlir/lib/Dialect/Linalg/Transforms/ElementwiseToLinalg.cpp index 8ee1b389dee83..ada9f8c02b890 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/ElementwiseToLinalg.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/ElementwiseToLinalg.cpp @@ -25,6 +25,61 @@ static bool isElementwiseMappableOpOnRankedTensors(Operation *op) { [](Type type) { return type.isa(); }); } +/// Given `op` assumed `isElementwiseMappableOpOnRankedTensors`, iterate over +/// the result types and return a list of values such that, for each result type +/// `t` and value `v` at the same index `idx`: +/// 1. `v.getType() == t` +/// 2. If an operand of `op` has type `t`, let `operand_first` be the first +/// such operand. Then`v == operand_first`. +/// 3. Otherwise, v is a newly created `linalg::InitTensorOp` with: +/// a. Static and dynamic dims extracted from the first operand of `op`. +/// b. Elemental type equal to the elemental type of `t`. +/// +/// This is sufficient because ElementwiseMappable guarantees that "The static +/// types of all vector (resp. tensor) operands and results must have the same +/// shape". +static SmallVector +getOrCreateOperandsMatchingResultTypes(OpBuilder &b, Operation *op) { + assert(isElementwiseMappableOpOnRankedTensors(op)); + Location loc = op->getLoc(); + ValueRange operands = op->getOperands(); + TypeRange rankedTensorTypes = op->getResultTypes(); + SmallVector res; + res.reserve(rankedTensorTypes.size()); + for (Type t : rankedTensorTypes) { + // Try to find an operand with type matching the result tensor. + bool found = false; + for (Value v : operands) { + if (v.getType() == t) { + found = true; + res.push_back(v); + break; + } + } + if (found) + continue; + + // Extract static / dynamic shape mix from the first operand. + Value firstOperand = operands.front(); + auto rankedTensorType = t.cast(); + SmallVector dynamicShape; + SmallVector staticShape; + dynamicShape.reserve(rankedTensorType.getRank()); + staticShape.reserve(rankedTensorType.getRank()); + unsigned idx = 0; + for (auto shape : rankedTensorType.getShape()) { + staticShape.push_back(shape); + if (rankedTensorType.isDynamicDim(idx)) + dynamicShape.push_back(b.create(loc, firstOperand, idx)); + ++idx; + } + // Create init tensor. + res.push_back(b.create( + loc, dynamicShape, staticShape, rankedTensorType.getElementType())); + } + return res; +} + namespace { struct ConvertAnyElementwiseMappableOpOnRankedTensors : public RewritePattern { ConvertAnyElementwiseMappableOpOnRankedTensors() @@ -41,18 +96,19 @@ struct ConvertAnyElementwiseMappableOpOnRankedTensors : public RewritePattern { rewriter.getMultiDimIdentityMap(rank)); SmallVector iteratorTypes(rank, getParallelIteratorTypeName()); + auto outputs = getOrCreateOperandsMatchingResultTypes(rewriter, op); rewriter.replaceOpWithNewOp( op, /*resultTensorTypes=*/op->getResultTypes(), /*inputs=*/op->getOperands(), - /*outputBuffers=*/ValueRange(), - /*initTensors=*/ValueRange(), + /*outputs=*/outputs, /*indexingMaps=*/indexingMaps, /*iteratorTypes=*/iteratorTypes, /*bodyBuilder=*/ [&](OpBuilder &builder, Location loc, ValueRange regionArgs) { OperationState state(loc, op->getName()); state.addAttributes(op->getAttrs()); - state.addOperands(regionArgs); + // Only take the input operands in the cloned elementwise op. + state.addOperands(regionArgs.take_front(op->getNumOperands())); auto resultTypes = llvm::to_vector<6>( llvm::map_range(op->getResultTypes(), [](Type type) { return type.cast().getElementType(); diff --git a/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp b/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp index d9ea7d8ccb294..b525108d22ab6 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp @@ -169,8 +169,7 @@ getShapeDefiningLoopRange(LinalgOp op, unsigned loopDepth, auto maps = op.indexing_maps(); // Iterate over the inputs and outputs in order. // Extract the subranges from the linearized ranges. - SmallVector ios(op.getInputsAndOutputBuffers()); - for (auto en : llvm::enumerate(ios)) { + for (auto en : llvm::enumerate(op.getShapedOperands())) { // The method `getRangeFromOperandShape` requires using SubViewOp or // SubTensorOps. If the value isnt defined from there continue. // todo: The method should be adapted to get the values from @@ -381,6 +380,8 @@ static bool isSameSubView(Value a, Value b) { static Optional findFusableProducer(LinalgOp consumer, unsigned consumerIdx, const LinalgDependenceGraph &dependenceGraph) { + assert(consumer.hasBufferSemantics() && "revisit usage of shaped operand"); + // Only consider RAW and WAW atm. for (auto depType : { LinalgDependenceGraph::DependenceType::RAW, @@ -390,26 +391,25 @@ findFusableProducer(LinalgOp consumer, unsigned consumerIdx, dependenceGraph.getDependencesInto(consumer, depType), [consumerIdx]( LinalgDependenceGraph::LinalgDependenceGraphElem elem) { - return elem.indexingOpView.operandIndex == consumerIdx; + return elem.indexingOpView->getOperandNumber() == consumerIdx; })) { - auto producer = cast(dependence.dependentOpView.op); // Check that the dependence is indeed on the input `consumerIdx` view. - auto consumedView = - consumer.getBuffer(dependence.indexingOpView.operandIndex); - if (!isSameSubView(consumer.getBuffer(consumerIdx), consumedView)) + Value consumedView = dependence.indexingOpView->get(); + if (!isSameSubView(consumer.getShapedOperand(consumerIdx), consumedView)) continue; // Consumer consumes this view, `isStructurallyFusableProducer` also // checks whether it is a strict subview of the producer view. - auto producedView = - producer.getBuffer(dependence.dependentOpView.operandIndex); + auto producer = cast(dependence.dependentOpView->getOwner()); + Value producedView = dependence.dependentOpView->get(); LLVM_DEBUG(llvm::dbgs() << "\n" << LinalgDependenceGraph::getDependenceTypeStr(depType) - << "producer: " << *producer.getOperation() - << " view: " << producedView << " output index: " - << dependence.dependentOpView.operandIndex - + << "producer: " << *dependence.dependentOpView->getOwner() + << " view: " << dependence.dependentOpView->get() + << " output index: " + << dependence.dependentOpView->getOperandNumber() - producer.getNumInputs() << "\n"); (void)producedView; @@ -433,13 +433,15 @@ mlir::linalg::fuseProducerOfBuffer(OpBuilder &b, LinalgOp consumer, if (!fusableDependence) return {}; - LinalgOp producerOp = cast(fusableDependence->dependentOpView.op); + LinalgOp producerOp = + cast(fusableDependence->dependentOpView->getOwner()); // If producer is already in the same block as consumer, we are done. if (consumer->getBlock() == producerOp->getBlock()) return {}; - unsigned producerIdx = fusableDependence->dependentOpView.operandIndex - - producerOp.getNumInputs(); + unsigned producerIdx = + fusableDependence->dependentOpView->getOperandNumber() - + producerOp.getNumInputs(); Value consumerView = consumer.getShapedOperand(consumerIdx); // Must be a subview or a slice to guarantee there are loops we can fuse @@ -548,12 +550,12 @@ static AffineMap pruneReductionDimsFromMap(ArrayRef iteratorTypes, /// inverse(producerIndexMap).compose(consumerIndexMap) static Optional getConsumerLoopToProducerLoopMap( LinalgDependenceGraph::LinalgDependenceGraphElem dependence) { - auto producer = cast(dependence.dependentOpView.op); + auto producer = cast(dependence.dependentOpView->getOwner()); AffineMap producerIndexingMap = - producer.getIndexingMap(dependence.dependentOpView.operandIndex); - auto consumer = cast(dependence.indexingOpView.op); + producer.getIndexingMap(dependence.dependentOpView->getOperandNumber()); + auto consumer = cast(dependence.indexingOpView->getOwner()); AffineMap consumerIndexingMap = - consumer.getIndexingMap(dependence.indexingOpView.operandIndex); + consumer.getIndexingMap(dependence.indexingOpView->getOperandNumber()); AffineMap prunedProducerIndexingMap = pruneReductionDimsFromMap( producer.iterator_types().getValue(), producerIndexingMap); @@ -733,14 +735,14 @@ FusableOpDependencesTy mlir::linalg::findAllFusableDependences( DenseMap fusedProducerIndexingMap; for (LinalgOp op : reverse(ops)) { for (auto operandIndex : - llvm::seq(0, op.getNumInputsAndOutputBuffers())) { + llvm::seq(0, op.getNumShapedOperands())) { Optional fusableDependence = findFusableProducer(op, operandIndex, dependenceGraph); if (!fusableDependence) continue; LinalgOp producerOp = - cast(fusableDependence->dependentOpView.op); + cast(fusableDependence->dependentOpView->getOwner()); // Do not fuse dependences that are to operations not in the same basic // block. This avoid moving fused operations across loops that might // themselves carry dependency making the fusion illegal. @@ -750,7 +752,8 @@ FusableOpDependencesTy mlir::linalg::findAllFusableDependences( } // Make sure that the indexing map of the view used for fusion in the // producer is a projected permutation. - unsigned producerIdx = fusableDependence->dependentOpView.operandIndex; + unsigned producerIdx = + fusableDependence->dependentOpView->getOperandNumber(); AffineMap producerMap = producerOp.getIndexingMap(producerIdx); if (!producerMap.isProjectedPermutation()) { op.emitRemark( @@ -760,7 +763,8 @@ FusableOpDependencesTy mlir::linalg::findAllFusableDependences( return FusableOpDependencesTy{}; } - unsigned consumerIdx = fusableDependence->indexingOpView.operandIndex; + unsigned consumerIdx = + fusableDependence->indexingOpView->getOperandNumber(); AffineMap consumerMap = op.getIndexingMap(consumerIdx); if (!consumerMap.isProjectedPermutation()) { op.emitRemark( diff --git a/mlir/lib/Dialect/Linalg/Transforms/FusionOnTensors.cpp b/mlir/lib/Dialect/Linalg/Transforms/FusionOnTensors.cpp index 22e03c1e2f92a..b1ea07309b4f8 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/FusionOnTensors.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/FusionOnTensors.cpp @@ -128,7 +128,9 @@ static void generateFusedTensorOpRegion(PatternRewriter &rewriter, for (auto consumerArg : llvm::enumerate(consumerBlock.getArguments())) { if (consumerArg.index() == consumerIdx + numConsumerIndices) { // Map the arguments for the args from the producer. - for (auto producerArg : llvm::enumerate(producerBlock.getArguments())) { + for (auto producerArg : + llvm::enumerate(producerBlock.getArguments().take_front( + producer.getNumInputs() + numProducerIndices))) { // If producer is an indexed_generic op, map the indices from consumer // loop to producer loop (because the fusedOp is built based on // consumer's perspective). @@ -213,7 +215,6 @@ fuseTensorOpsImpl(LinalgOp producer, LinalgOp consumer, unsigned consumerIdx, consumerIndexMaps.end()); // Generate the fused op. - // Tensor-level fusion is only on ops without initTensors and outputBuffers. LinalgOp fusedOp; if (isa(producer.getOperation()) && isa(consumer.getOperation())) { @@ -221,8 +222,8 @@ fuseTensorOpsImpl(LinalgOp producer, LinalgOp consumer, unsigned consumerIdx, rewriter .create(consumer.getLoc(), consumer->getResultTypes(), /*inputs=*/fusedOperands, - /*outputBuffers=*/ValueRange{}, - /*initTensors=*/ValueRange{}, + // TODO: handle outputs. + consumer.getOutputs(), rewriter.getArrayAttr(fusedIndexMaps), consumer.iterator_types(), /*doc=*/nullptr, @@ -230,18 +231,18 @@ fuseTensorOpsImpl(LinalgOp producer, LinalgOp consumer, unsigned consumerIdx, /*sparse=*/nullptr) .getOperation(); } else { - fusedOp = rewriter - .create( - consumer.getLoc(), consumer->getResultTypes(), - /*inputs=*/fusedOperands, - /*outputBuffers=*/ValueRange{}, - /*initTensors=*/ValueRange{}, - rewriter.getArrayAttr(fusedIndexMaps), - consumer.iterator_types(), - /*doc=*/nullptr, - /*library_call=*/nullptr, - /*sparse=*/nullptr) - .getOperation(); + fusedOp = + rewriter + .create( + consumer.getLoc(), consumer->getResultTypes(), + /*inputs=*/fusedOperands, + // TODO: handle outputs. + consumer.getOutputs(), rewriter.getArrayAttr(fusedIndexMaps), + consumer.iterator_types(), + /*doc=*/nullptr, + /*library_call=*/nullptr, + /*sparse=*/nullptr) + .getOperation(); } // Construct an AffineMap from consumer loops to producer loops. @@ -430,6 +431,42 @@ static bool isFusableWithReshapeByDimExpansion(LinalgOp linalgOp, }); } +// Get the output tensor to use for the expanded operation. Creates an +// `linalg.init_tensor` operation to materialize the tensor that carries the +// shape information. +static Value getOutputValueForExpansion( + OpBuilder &builder, Location loc, AffineMap outputIndexingMap, Value result, + ArrayRef> origDimToExpandedShapeMap) { + SmallVector dynamicDims; + SmallVector staticDims; + ShapedType resultType = result.getType().cast(); + ArrayRef origShape = resultType.getShape(); + for (AffineExpr expr : outputIndexingMap.getResults()) { + unsigned origDimPos = expr.cast().getPosition(); + ArrayRef expandedShape(origDimToExpandedShapeMap[origDimPos]); + bool foundDynamic = false; + int64_t linearizedShape = 1; + for (int64_t extent : expandedShape) { + if (ShapedType::isDynamic(extent)) { + assert(!foundDynamic && + "Expanded dimensions of reshape can have only one dynamic dim"); + staticDims.push_back(ShapedType::kDynamicSize); + foundDynamic = true; + continue; + } + staticDims.push_back(extent); + linearizedShape *= extent; + } + if (ShapedType::isDynamic(origShape[origDimPos])) { + Value origDim = builder.create(loc, result, origDimPos); + dynamicDims.push_back(builder.create( + loc, origDim, builder.create(loc, linearizedShape))); + } + } + return builder.create(loc, dynamicDims, staticDims, + resultType.getElementType()); +} + /// Implements the fusion of a tensor_reshape op and a generic/indexed_generic /// op as explained in `isFusableWithReshapeByExpansion`. Assumes that those /// conditions have been satisfied. @@ -548,7 +585,7 @@ fuseWithReshapeByExpansion(LinalgOp linalgOp, TensorReshapeOp reshapeOp, expandedOpOperands.push_back(reshapeOp.src()); continue; } - AffineMap indexingMap = linalgOp.getIndexingMap(operand.index()); + AffineMap indexingMap = linalgOp.getInputIndexingMap(operand.index()); SmallVector reassociation; SmallVector expandedOperandShape; getReshapeInfo(indexingMap, reassociation, expandedOperandShape); @@ -563,17 +600,17 @@ fuseWithReshapeByExpansion(LinalgOp linalgOp, TensorReshapeOp reshapeOp, expandedOpOperands.push_back(operand.value()); } } - SmallVector resultTypes; + + Location loc = linalgOp.getLoc(); + SmallVector outputs; SmallVector, 1> resultReassociation; - for (auto result : llvm::enumerate(linalgOp->getResults())) { - AffineMap indexingMap = - linalgOp.getIndexingMap(linalgOp.getNumInputs() + result.index()); + for (auto result : llvm::enumerate(linalgOp.getOutputs())) { + AffineMap indexingMap = linalgOp.getOutputIndexingMap(result.index()); SmallVector reassociation; SmallVector expandedResultShape; getReshapeInfo(indexingMap, reassociation, expandedResultShape); - resultTypes.push_back(RankedTensorType::get( - expandedResultShape, - result.value().getType().cast().getElementType())); + outputs.push_back(getOutputValueForExpansion( + rewriter, loc, indexingMap, result.value(), expandedDimsShape)); resultReassociation.emplace_back(std::move(reassociation)); } @@ -581,11 +618,11 @@ fuseWithReshapeByExpansion(LinalgOp linalgOp, TensorReshapeOp reshapeOp, SmallVector iteratorTypes(remapping.back(), getParallelIteratorTypeName()); + TypeRange resultTypes = ValueRange(outputs).getTypes(); LinalgOp fusedOp = createLinalgOpOfSameType( linalgOp, rewriter, linalgOp.getLoc(), resultTypes, - /*inputs=*/expandedOpOperands, - /*outputBuffers=*/ValueRange{}, - /*initTensors=*/ValueRange{}, expandedOpIndexingMaps, iteratorTypes); + /*inputs=*/expandedOpOperands, outputs, expandedOpIndexingMaps, + iteratorTypes); Region &fusedRegion = fusedOp->getRegion(0); Region &originalRegion = linalgOp->getRegion(0); @@ -656,6 +693,47 @@ fuseWithReshapeByExpansion(LinalgOp linalgOp, TensorReshapeOp reshapeOp, return resultVals; } +static Value +getOutputValueForLinearization(OpBuilder &builder, Location loc, + Value origOutput, + ArrayRef reassociationMaps) { + SmallVector dynamicDims; + SmallVector staticDims; + auto shapedType = origOutput.getType().cast(); + ArrayRef origShape = shapedType.getShape(); + for (auto map : reassociationMaps) { + Optional dynamicDim; + int64_t staticLinearizedShape = 1; + for (AffineDimExpr expr : + llvm::map_range(map.getResults(), [](AffineExpr e) { + return e.cast(); + })) { + unsigned pos = expr.getPosition(); + if (ShapedType::isDynamic(origShape[pos])) { + Value dim = builder.create(loc, origOutput, pos); + if (dynamicDim) { + dynamicDim = builder.create(loc, dynamicDim.getValue(), dim); + } else { + dynamicDim = dim; + } + } else { + staticLinearizedShape *= origShape[pos]; + } + } + if (dynamicDim) { + dynamicDim = builder.create( + loc, dynamicDim.getValue(), + builder.create(loc, staticLinearizedShape)); + dynamicDims.push_back(dynamicDim.getValue()); + staticDims.push_back(ShapedType::kDynamicSize); + } else { + staticDims.push_back(staticLinearizedShape); + } + } + return builder.create(loc, dynamicDims, staticDims, + shapedType.getElementType()); +} + namespace { /// Pattern to fold tensor_reshape op with its consumer by using the source of @@ -704,6 +782,8 @@ struct FoldProducerReshapeOpByLinearization // Compute the fused operands list, SmallVector fusedOperands(linalgOp.getInputs()); fusedOperands[operand.index()] = reshapeOp.src(); + fusedOperands.append(linalgOp.getOutputs().begin(), + linalgOp.getOutputs().end()); // Compute indexing_maps for the fused operation. The indexing_maps for // the operands of the consumers that arent fused are the same. @@ -736,7 +816,7 @@ struct FoldProducerReshapeOpByLinearization rewriter.eraseOp(reshapeOp); return success(); } - return op.emitRemark("no fusion candidates found"); + return failure(); } }; @@ -816,12 +896,15 @@ struct FoldConsumerReshapeOpByLinearization if (!inversePermutation(concatAffineMaps(fusedIndexMaps))) return reshapeOp.emitRemark("fused op loop bound computation failed"); + Location loc = producer.getLoc(); + Value output = + getOutputValueForLinearization(rewriter, loc, producer.getOutputs()[0], + reshapeOp.getReassociationMaps()); LinalgOp fusedOp = createLinalgOpOfSameType( - producer, rewriter, rewriter.getUnknownLoc(), reshapeOp.getResultType(), + producer, rewriter, loc, reshapeOp.getResultType(), /*inputs=*/producer.getInputs(), - /*outputBuffers=*/ValueRange{}, - /*initTensors=*/ValueRange{}, // no init tensors for now. - rewriter.getAffineMapArrayAttr(fusedIndexMaps), + // TODO: handle outputs. + /*outputs=*/output, rewriter.getAffineMapArrayAttr(fusedIndexMaps), producer.iterator_types(), /*doc=*/nullptr, /*library_call=*/nullptr, @@ -902,8 +985,7 @@ struct FoldSplatConstants : public OpRewritePattern { linalgOp, rewriter, rewriter.getUnknownLoc(), linalgOp->getResultTypes(), /*inputs=*/fusedOperands, - /*outputBuffers=*/ValueRange{}, - /*initTensors=*/ValueRange{}, // no init tensors for now. + /*outputs=*/linalgOp.getOutputs(), rewriter.getAffineMapArrayAttr(fusedIndexMaps), linalgOp.iterator_types(), /*doc=*/nullptr, @@ -915,7 +997,7 @@ struct FoldSplatConstants : public OpRewritePattern { Region &linalgOpRegion = linalgOp->getRegion(0); Block &entryBlock = *linalgOpRegion.begin(); unsigned argIndex = entryBlock.getNumArguments() - - linalgOp.getNumInputs() + operand.index(); + linalgOp.getNumShapedOperands() + operand.index(); BlockAndValueMapping mapping; mapping.map(entryBlock.getArgument(argIndex), scalarConstant); Region &fusedRegion = fusedOp->getRegion(0); diff --git a/mlir/lib/Dialect/Linalg/Transforms/Generalization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Generalization.cpp index 3496a77969881..454bbbe3578a4 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Generalization.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Generalization.cpp @@ -45,8 +45,8 @@ static linalg::GenericOp createGenericOpFromNamedOp(linalg::LinalgOp namedOp, SmallVector types(resultTypes.begin(), resultTypes.end()); return builder.create( - namedOp.getLoc(), types, namedOp.getInputs(), namedOp.getOutputBuffers(), - namedOp.getInitTensors(), indexingMaps, iterators, + namedOp.getLoc(), types, namedOp.getInputs(), namedOp.getOutputs(), + indexingMaps, iterators, [®ionBuilder](OpBuilder &bodyBuilder, Location loc, ValueRange) { edsc::ScopedContext scope(bodyBuilder, loc); regionBuilder(*bodyBuilder.getBlock()); @@ -153,8 +153,8 @@ linalg::GenericOp GeneralizeConvOp::createGenericOp(linalg::ConvOp convOp, llvm::to_vector<4>(convOp.iterator_types().getAsValueRange()); return builder.create( convOp.getLoc(), /*resultTensorTypes=*/ArrayRef(), - convOp.getInputBuffers(), convOp.getOutputBuffers(), - /*initTensors=*/ValueRange(), indexingMaps, iterators, + convOp.getInputBuffers(), convOp.getOutputBuffers(), indexingMaps, + iterators, [](OpBuilder &bodyBuilder, Location bodyLoc, ValueRange bodyArgs) { Value mul = bodyBuilder.create(bodyLoc, bodyArgs[0], bodyArgs[1]); diff --git a/mlir/lib/Dialect/Linalg/Transforms/Interchange.cpp b/mlir/lib/Dialect/Linalg/Transforms/Interchange.cpp index a7f0660281b5c..cac0ae0d081c4 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Interchange.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Interchange.cpp @@ -64,7 +64,7 @@ LinalgOp mlir::linalg::interchange(LinalgOp op, assert(permutationMap && "expected permutation to be invertible"); SmallVector newIndexingMaps; auto indexingMaps = op.indexing_maps().getValue(); - for (unsigned i = 0, e = op.getNumInputsAndOutputs(); i != e; ++i) { + for (unsigned i = 0, e = op.getNumShapedOperands(); i != e; ++i) { AffineMap m = indexingMaps[i].cast().getValue(); if (!permutationMap.isEmpty()) m = m.compose(permutationMap); diff --git a/mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp b/mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp index 073673bc33f86..329cc88bd2aed 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp @@ -172,7 +172,8 @@ LinalgOpInstancePromotionOptions::LinalgOpInstancePromotionOptions( LinalgOp linalgOp, const LinalgPromotionOptions &options) : subViews(), dynamicBuffers(options.dynamicBuffers), alignment(options.alignment) { - unsigned nBuffers = linalgOp.getNumInputsAndOutputBuffers(); + assert(linalgOp.hasBufferSemantics() && "revisit usage of shaped operand"); + unsigned nBuffers = linalgOp.getNumShapedOperands(); auto vUseFullTileBuffers = options.useFullTileBuffers.getValueOr(llvm::SmallBitVector()); vUseFullTileBuffers.resize(nBuffers, options.useFullTileBuffersDefault); @@ -180,7 +181,7 @@ LinalgOpInstancePromotionOptions::LinalgOpInstancePromotionOptions( for (unsigned idx = 0; idx != nBuffers; ++idx) { if (options.operandsToPromote && !options.operandsToPromote->count(idx)) continue; - auto *op = linalgOp.getBuffer(idx).getDefiningOp(); + auto *op = linalgOp.getShapedOperand(idx).getDefiningOp(); if (auto sv = dyn_cast_or_null(op)) { subViews[idx] = sv; useFullTileBuffers[sv] = vUseFullTileBuffers[idx]; @@ -326,10 +327,10 @@ promoteSubViews(OpBuilder &b, LinalgOp op, // operands are not views. This is to support cases such as FillOp taking // extra scalars etc. Keep a reference to output buffers; SmallVector opViews; - opViews.reserve(op.getNumInputsAndOutputs()); + opViews.reserve(op.getNumShapedOperands()); SmallVector, 8> writebackViews; writebackViews.reserve(promotedBuffersAndViews->size()); - for (auto view : llvm::enumerate(op.getInputsAndOutputBuffers())) { + for (auto view : llvm::enumerate(op.getShapedOperands())) { if (options.subViews.count(view.index()) != 0) { if (options.useFullTileBuffers[view.value()]) opViews.push_back( @@ -371,7 +372,7 @@ mlir::linalg::promoteSubviewsPrecondition(Operation *op, if (!linOp || !linOp.hasBufferSemantics()) return failure(); // Check that at least one of the requested operands is indeed a subview. - for (auto en : llvm::enumerate(linOp.getInputsAndOutputBuffers())) { + for (auto en : llvm::enumerate(linOp.getShapedOperands())) { auto sv = isa_and_nonnull(en.value().getDefiningOp()); if (sv) { if (!options.operandsToPromote.hasValue() || diff --git a/mlir/lib/Dialect/Linalg/Transforms/Sparsification.cpp b/mlir/lib/Dialect/Linalg/Transforms/Sparsification.cpp index fed2eedd41a45..eb940d0f769bb 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Sparsification.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Sparsification.cpp @@ -334,7 +334,7 @@ struct CodeGen { /// Helper method to inspect sparse annotations in the linalg operation. /// Fills the per-dimension sparsity information for all tensors. static void findSparseAnnotations(Merger &merger, linalg::GenericOp op) { - unsigned numTensors = op.getNumInputsAndOutputs(); + unsigned numTensors = op.getNumShapedOperands(); ArrayAttr sparseAttr = op.sparseAttr(); for (unsigned t = 0; t < numTensors; t++) { auto map = op.getIndexingMap(t); @@ -467,7 +467,7 @@ static unsigned buildLattices(Merger &merger, linalg::GenericOp op, // is set to a synthetic tensor with undefined indices only. unsigned s = merger.addSet(); unsigned t = kind == Kind::kTensor ? merger.exp(exp).e0 - : op.getNumInputsAndOutputs(); + : op.getNumShapedOperands() - 1; merger.set(s).push_back(merger.addLat(t, idx, exp)); return s; } @@ -504,7 +504,7 @@ static Type genIntType(PatternRewriter &rewriter, linalg::SparseIntType tp) { static void genBuffers(Merger &merger, CodeGen &codegen, PatternRewriter &rewriter, linalg::GenericOp op) { Location loc = op.getLoc(); - unsigned numTensors = op.getNumInputsAndOutputs(); + unsigned numTensors = op.getNumShapedOperands(); unsigned numInputs = op.getNumInputs(); assert(numTensors == numInputs + 1); @@ -544,7 +544,7 @@ static void genBuffers(Merger &merger, CodeGen &codegen, up = codegen.sizes[i]; assert(up); // TODO: what else? } else { - Value arg = t < numInputs ? op.getInput(t) : op.getInitTensor(0); + Value arg = t < numInputs ? op.getInput(t) : op.getInitTensors()[0]; up = rewriter.create(loc, arg, d); } args.push_back(up); @@ -597,7 +597,7 @@ static void genTensorStore(Merger &merger, CodeGen &codegen, PatternRewriter &rewriter, linalg::GenericOp op, unsigned tensor, Value rhs) { // Test if this is a scalarized reduction. - unsigned lhs = op.getNumInputsAndOutputs() - 1; + unsigned lhs = op.getNumShapedOperands() - 1; if (lhs == tensor && codegen.redVal) { codegen.redVal = rhs; return; @@ -670,7 +670,7 @@ static void genInvariants(Merger &merger, CodeGen &codegen, atLevel = true; } // All exhausted at this level (atLevel denotes exactly at this level). - unsigned lhs = op.getNumInputsAndOutputs() - 1; + unsigned lhs = op.getNumShapedOperands() - 1; if (lhs == tensor) { codegen.redExp = hoist ? exp : -1u; } else if (atLevel) { @@ -995,7 +995,7 @@ static void genStmt(Merger &merger, CodeGen &codegen, PatternRewriter &rewriter, unsigned exp, unsigned at) { // At each leaf, assign remaining tensor (sub)expression to output tensor. if (at == topSort.size()) { - unsigned lhs = op.getNumInputsAndOutputs() - 1; + unsigned lhs = op.getNumShapedOperands() - 1; Value rhs = genExp(merger, codegen, rewriter, op, exp); genTensorStore(merger, codegen, rewriter, op, lhs, rhs); return; @@ -1073,7 +1073,7 @@ static void genStmt(Merger &merger, CodeGen &codegen, PatternRewriter &rewriter, Value red = codegen.redVal; if (red) { codegen.redVal = merger.exp(codegen.redExp).val = Value(); // end chain - unsigned lhs = op.getNumInputsAndOutputs() - 1; + unsigned lhs = op.getNumShapedOperands() - 1; genTensorStore(merger, codegen, rewriter, op, lhs, red); } codegen.loops[idx] = Value(); @@ -1095,7 +1095,7 @@ struct GenericOpSparsifier : public OpRewritePattern { if (!op.hasSparseSemantics()) return failure(); assert(op.getNumOutputs() == 1); - unsigned numTensors = op.getNumInputsAndOutputs(); + unsigned numTensors = op.getNumShapedOperands(); unsigned numLoops = op.iterator_types().getValue().size(); Merger merger(numTensors, numLoops); findSparseAnnotations(merger, op); diff --git a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp index 423d687c1eb8c..f323d2e50435e 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp @@ -375,9 +375,9 @@ tileLinalgOpImpl(OpBuilder &b, LinalgOp op, ValueRange tileSizes, // 2. Create the tiled loops. LinalgOp res = op; SmallVector ivs, tensorResults; - auto initTensors = op.getInitTensors(); + auto outputTensors = op.getOutputTensors(); GenerateLoopNest::doit( - loopRanges, /*iterArgInitValues*/ initTensors, iteratorTypes, + loopRanges, /*iterArgInitValues*/ outputTensors, iteratorTypes, [&](ValueRange localIvs, ValueRange iterArgs) -> scf::ValueVector { auto &b = ScopedContext::getBuilderRef(); auto loc = ScopedContext::getLocation(); @@ -392,14 +392,16 @@ tileLinalgOpImpl(OpBuilder &b, LinalgOp op, ValueRange tileSizes, else interchangedIvs.assign(ivs.begin(), ivs.end()); - assert(op.getNumInitTensors() == iterArgs.size() && - "num init tensors must match number of loop iter arguments"); - // This uses knowledge about position of the init tensor in the list - // of operands. - auto operands = llvm::to_vector<4>(op.getShapedOperands()); - std::copy(iterArgs.begin(), iterArgs.end(), - operands.begin() + op.getNumInputsAndOutputBuffers()); + assert(op.getNumOutputTensors() == iterArgs.size() && + "num output tensors must match number of loop iter arguments"); + auto operands = llvm::to_vector<4>(op.getInputs()); + SmallVector outputBuffers = op.getOutputBuffers(); + // TODO: thanks to simplifying assumption we do not need to worry about + // order of output buffers and tensors: there is only ever one kind. + assert(outputBuffers.empty() || iterArgs.empty()); + operands.append(outputBuffers.begin(), outputBuffers.end()); + operands.append(iterArgs.begin(), iterArgs.end()); SmallVector tiledOperands = makeTiledShapes(b, loc, op, operands, shapeSizesToLoopsMap, interchangedIvs, tileSizes, allShapeSizes); @@ -407,41 +409,31 @@ tileLinalgOpImpl(OpBuilder &b, LinalgOp op, ValueRange tileSizes, tiledOperands.append(nonShapedOperands.begin(), nonShapedOperands.end()); - // If LinalgOp has results, they must all be tied to init tensors. - // We enforce this to ensure all tiled ops have been rewritten in - // "init tensor" form. This ensures tiling has anchor values into which - // to subtensor / subtensor_insert. Otherwise tiling would need to - // allocate which is not acceptable. - // This would not be the case with a special terminator op that - // generates the whole tensor (instead of inserting a subtensor). But - // the generator-based abstraction has other issues. - assert(op.getNumInitTensors() == op->getNumResults() && - "expected same number of init tensors as number of results"); - - // Handle init tensor operands. - // This uses knowledge about position of the init tensor in the list - // of operands. - // TODO: InterfaceAdaptor ? + // TODO: use an interface/adaptor to avoid leaking position in + // `tiledOperands`. SmallVector resultTensorTypes; - for (auto idx : llvm::seq(0, op.getNumInitTensors())) + for (OpOperand *opOperand : op.getOutputTensorsOpOperands()) resultTensorTypes.push_back( - tiledOperands[op.getNumInputsAndOutputBuffers() + idx].getType()); + tiledOperands[opOperand->getOperandNumber()].getType()); res = op.clone(b, loc, resultTensorTypes, tiledOperands); - // Insert a subtensor_insert for each init subtensor. - for (unsigned idx = 0, e = op.getNumInitTensors(); idx != e; ++idx) { - Value initTensor = - tiledOperands[op.getNumInputsAndOutputBuffers() + idx]; - if (auto subtensor = initTensor.getDefiningOp()) { + // Insert a subtensor_insert for each output tensor. + unsigned resultIdx = 0; + for (OpOperand *opOperand : op.getOutputTensorsOpOperands()) { + // TODO: use an interface/adaptor to avoid leaking position in + // `tiledOperands`. + Value outputTensor = tiledOperands[opOperand->getOperandNumber()]; + if (auto subtensor = outputTensor.getDefiningOp()) { tensorResults.push_back(b.create( - loc, subtensor.source().getType(), res->getResult(idx), + loc, subtensor.source().getType(), res->getResult(resultIdx), subtensor.source(), subtensor.offsets(), subtensor.sizes(), subtensor.strides(), subtensor.static_offsets(), subtensor.static_sizes(), subtensor.static_strides())); } else { - tensorResults.push_back(res->getResult(idx)); + tensorResults.push_back(res->getResult(resultIdx)); } + ++resultIdx; } return scf::ValueVector(tensorResults.begin(), tensorResults.end()); }, diff --git a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp index 804ae6681f8cb..c5d811c41edb0 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp @@ -125,17 +125,6 @@ LogicalResult mlir::linalg::LinalgBaseTilingPattern::matchAndRewriteBase( if (failed(marker.checkAndNotify(rewriter, linalgOp))) return failure(); - // If LinalgOp has results, they must all be tied to init tensors. - // We enforce this to ensure all tiled ops have been rewritten in - // "init tensor" form. This ensures tiling has anchor values into which to - // subtensor / subtensor_insert. Otherwise tiling would need to allocate which - // is not acceptable. - // This would not be the case with a special terminator op that generates the - // whole tensor (instead of inserting a subtensor). But the generator-based - // abstraction has other issues. - if (linalgOp.getNumInitTensors() != linalgOp->getNumResults()) - return failure(); - Optional res = tileLinalgOp(rewriter, linalgOp, options); if (!res) @@ -174,10 +163,10 @@ LogicalResult mlir::linalg::LinalgBaseTileAndFusePattern::matchAndRewrite( producers.insert(linalgOp); for (auto dependence : dependenceGraph.getDependentOperations(linalgOp)) { if (!fusionOptions.indicesToFuse.count( - dependence.indexingOpView.operandIndex)) + dependence.indexingOpView->getOperandNumber())) continue; - if (isa(dependence.dependentOpView.op)) - producers.insert(dependence.dependentOpView.op); + if (isa(dependence.dependentOpView->getOwner())) + producers.insert(dependence.dependentOpView->getOwner()); } SmallVector fusionOps; diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp index 7165ee775e9c6..23e452df91842 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp @@ -199,9 +199,8 @@ class GenericVectorizer { // block argument. auto scalarArg = scalarValue.cast(); assert(scalarArg.getOwner() == &generic.region().front()); - Value vector_arg = - generic.getInputsAndOutputBuffers()[scalarArg.getArgNumber()]; - Value vectorResult = transferReadVector(builder, vector_arg); + Value vectorArg = generic.getShapedOperand(scalarArg.getArgNumber()); + Value vectorResult = transferReadVector(builder, vectorArg); valueCache[scalarArg] = vectorResult; return vectorResult; } @@ -277,7 +276,7 @@ static void vectorizeElementwise(linalg::GenericOp op, OpBuilder &builder) { LogicalResult mlir::linalg::vectorizeLinalgOpPrecondition(Operation *op) { auto linalgOp = cast(op); // All types must be static shape to go to vector. - for (Value operand : linalgOp.getInputsAndOutputBuffers()) + for (Value operand : linalgOp.getShapedOperands()) if (!operand.getType().cast().hasStaticShape()) return failure(); for (Type outputTensorType : linalgOp.getOutputTensorTypes()) diff --git a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp index f44bb6769e616..81bfbc6ecf525 100644 --- a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp +++ b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp @@ -104,12 +104,6 @@ SmallVector getStaticShape(LinalgOp linalgOp) { auto shape = v.getType().cast().getShape(); res.append(shape.begin(), shape.end()); } - if (linalgOp.getNumInitTensors()) - return res; - for (Value v : linalgOp.getOperation()->getResults()) { - auto shape = v.getType().cast().getShape(); - res.append(shape.begin(), shape.end()); - } return res; } diff --git a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp index c0af06314086c..30bf546807c43 100644 --- a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp +++ b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp @@ -1477,12 +1477,12 @@ struct DimOfCastOp : public OpRewritePattern { return success(); } }; - } // end anonymous namespace. void DimOp::getCanonicalizationPatterns(OwningRewritePatternList &results, MLIRContext *context) { - results.insert>(context); + results.insert, + DimOfCastOp>(context); } // --------------------------------------------------------------------------- diff --git a/mlir/test/Dialect/Linalg/bufferize.mlir b/mlir/test/Dialect/Linalg/bufferize.mlir index 368568bdcc4a4..08d715f90b5e1 100644 --- a/mlir/test/Dialect/Linalg/bufferize.mlir +++ b/mlir/test/Dialect/Linalg/bufferize.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -linalg-bufferize -split-input-file %s | FileCheck %s +// RUN: mlir-opt -linalg-bufferize -canonicalize -cse -split-input-file %s | FileCheck %s #map0 = affine_map<(d0) -> (d0)> @@ -26,8 +26,9 @@ func @basic(%arg0: tensor<4xf32>) -> tensor<4xf32> { %0 = linalg.generic { indexing_maps = [#map0, #map0], iterator_types = ["parallel"] - } ins(%arg0 : tensor<4xf32>) { - ^bb0(%gen_arg1: f32): + } ins(%arg0 : tensor<4xf32>) + outs(%arg0 : tensor<4xf32>) { + ^bb0(%gen_arg1: f32, %out: f32): %tmp1 = exp %gen_arg1 : f32 linalg.yield %tmp1 : f32 } -> tensor<4xf32> @@ -35,6 +36,35 @@ func @basic(%arg0: tensor<4xf32>) -> tensor<4xf32> { } +// ----- + +#map0 = affine_map<(d0) -> (d0)> + +// Same as above but with linalg.init_tensor op. + +// CHECK: #map = affine_map<(d0) -> (d0)> +// CHECK-LABEL: func @init_tensor( +// CHECK-SAME: %[[IN:.*]]: tensor, %[[SIZE:.*]]: index) +// CHECK: %[[OUT_BUF:.*]] = alloc(%[[SIZE]]) : memref +// CHECK: %[[MEMREF:.*]] = tensor_to_memref %[[IN]] : memref +// CHECK: linalg.generic +// CHECK-SAME: ins(%[[MEMREF]] : memref) +// CHECK-SAME: outs(%[[OUT_BUF]] : memref) { +func @init_tensor(%in : tensor, %size: index) -> tensor { + %init = linalg.init_tensor [%size] : tensor + %0 = linalg.generic { + indexing_maps = [#map0, #map0], + iterator_types = ["parallel"] + } ins(%in : tensor) + outs(%init : tensor) { + ^bb0(%gen_arg1: f32, %out: f32): + %tmp1 = exp %gen_arg1 : f32 + linalg.yield %tmp1 : f32 + } -> tensor + return %0 : tensor +} + + // ----- #map0 = affine_map<(d0) -> (d0)> @@ -50,8 +80,9 @@ func @multiple_results(%arg0: tensor<4xf32>) -> (tensor<4xf32>, tensor<4xf32>) { %0, %1 = linalg.generic { indexing_maps = [#map0, #map0, #map0], iterator_types = ["parallel"] - } ins(%arg0 : tensor<4xf32>) { - ^bb0(%gen_arg1: f32): + } ins(%arg0 : tensor<4xf32>) + outs (%arg0, %arg0 : tensor<4xf32>, tensor<4xf32>) { + ^bb0(%gen_arg1: f32, %out1: f32, %out2: f32): %tmp1 = exp %gen_arg1 : f32 linalg.yield %tmp1, %tmp1 : f32, f32 } -> tensor<4xf32>, tensor<4xf32> @@ -74,8 +105,9 @@ func @multiple_results_indexed(%arg0: tensor<4xi32>) %0, %1 = linalg.indexed_generic { indexing_maps = [#map0, #map0, #map0], iterator_types = ["parallel"] - } ins(%arg0 : tensor<4xi32>) { - ^bb0(%i: index, %gen_arg1: i32): + } ins(%arg0 : tensor<4xi32>) + outs (%arg0, %arg0 : tensor<4xi32>, tensor<4xi32>) { + ^bb0(%i: index, %gen_arg1: i32, %out1: i32, %out2: i32): %i_i32 = index_cast %i : index to i32 %tmp1 = addi %gen_arg1, %i_i32 : i32 linalg.yield %tmp1, %tmp1 : i32, i32 @@ -86,32 +118,30 @@ func @multiple_results_indexed(%arg0: tensor<4xi32>) // ----- #map_2d = affine_map<(d0, d1) -> (d0, d1)> -#map_2d_inv = affine_map<(d0, d1) -> (d1, d0)> // Check that the allocs properly consider the different shapes of the output // operands. The permuted indexing maps translate to different output shapes. -// CHECK: #map0 = affine_map<(d0, d1) -> (d0, d1)> -// CHECK: #map1 = affine_map<(d0, d1) -> (d1, d0)> // CHECK-LABEL: func @dynamic_results( // CHECK-SAME: %[[ARG:.*]]: tensor -// CHECK: %[[MEMREF_ARG:.*]] = tensor_to_memref %[[ARG]] : memref // CHECK: %[[C0:.*]] = constant 0 : index -// CHECK: %[[DIM0:.*]] = dim %[[ARG]], %[[C0]] : tensor // CHECK: %[[C1:.*]] = constant 1 : index +// CHECK: %[[MEMREF_ARG:.*]] = tensor_to_memref %[[ARG]] : memref +// CHECK: %[[DIM0:.*]] = dim %[[ARG]], %[[C0]] : tensor // CHECK: %[[DIM1:.*]] = dim %[[ARG]], %[[C1]] : tensor // CHECK: %[[RESULT0:.*]] = alloc(%[[DIM0]], %[[DIM1]]) : memref -// CHECK: %[[RESULT1:.*]] = alloc(%[[DIM1]], %[[DIM0]]) : memref -// CHECK: linalg.generic {indexing_maps = [#map0, #map0, #map1] +// CHECK: %[[RESULT1:.*]] = alloc(%[[DIM0]], %[[DIM1]]) : memref +// CHECK: linalg.generic // CHECK-SAME: ins(%[[MEMREF_ARG]] : memref) // CHECK-SAME: outs(%[[RESULT0]], %[[RESULT1]] : memref, memref) func @dynamic_results(%arg0: tensor) -> (tensor, tensor) { %0, %1 = linalg.generic { - indexing_maps = [#map_2d, #map_2d, #map_2d_inv], + indexing_maps = [#map_2d, #map_2d, #map_2d], iterator_types = ["parallel", "parallel"] - } ins(%arg0 : tensor) { - ^bb0(%gen_arg1: f32): + } ins(%arg0 : tensor) + outs (%arg0, %arg0 : tensor, tensor) { + ^bb0(%gen_arg1: f32, %out1: f32, %out2: f32): %tmp1 = exp %gen_arg1 : f32 linalg.yield %tmp1, %tmp1 : f32, f32 } -> tensor, tensor @@ -147,10 +177,9 @@ func @generic_with_init_tensor(%arg0: tensor<2x3x4xvector<3x4xi4>>, %0 = linalg.generic #trait ins(%arg0 : tensor<2x3x4xvector<3x4xi4>>) - init(%arg1 : tensor<3x2xf32>) { + outs(%arg1 : tensor<3x2xf32>) { ^bb(%v0: vector<3x4xi4>, %v1: f32) : - %f0 = constant 0.0 : f32 - linalg.yield %f0 : f32 + linalg.yield %v1 : f32 } -> tensor<3x2xf32> return %0 : tensor<3x2xf32> @@ -204,16 +233,16 @@ func @bufferize_subtensor_insert(%t : tensor, %st0 : tensor<2x3xf32>, % (tensor, tensor) { %c0 = constant 0 : index %c1 = constant 1 : index - // CHECK: %[[IDX:.*]] = call @make_index() : () -> index + // CHECK-NEXT: %[[C0:.*]] = constant 0 : index + // CHECK-NEXT: %[[C1:.*]] = constant 1 : index %i0 = call @make_index() : () -> index + // CHECK: %[[IDX:.*]] = call @make_index() : () -> index - // CHECK-DAG: %[[M0:.*]] = tensor_to_memref %[[T]] : memref - // CHECK-DAG: %[[SM0:.*]] = tensor_to_memref %[[ST0]] : memref<2x3xf32> - // CHECK-NEXT: %[[C0:.*]] = constant 0 : index - // CHECK-NEXT: %[[DIM0:.*]] = dim %[[M0]], %[[C0]] : memref - // CHECK-NEXT: %[[C1:.*]] = constant 1 : index - // CHECK-NEXT: %[[DIM1:.*]] = dim %[[M0]], %[[C1]] : memref + // CHECK-DAG: %[[M0:.*]] = tensor_to_memref %[[T]] : memref + // CHECK-DAG: %[[SM0:.*]] = tensor_to_memref %[[ST0]] : memref<2x3xf32> + // CHECK-NEXT: %[[DIM0:.*]] = dim %[[T]], %[[C0]] : tensor + // CHECK-NEXT: %[[DIM1:.*]] = dim %[[T]], %[[C1]] : tensor // CHECK-NEXT: %[[M0_COPY:.*]] = alloc(%[[DIM0]], %[[DIM1]]) : memref // CHECK-NEXT: linalg.copy(%[[M0]], %[[M0_COPY]]) : memref, memref // CHECK-NEXT: %[[SUBVIEW0:.*]] = subview %[[M0_COPY]][0, 0] [2, 3] [1, 1] @@ -224,10 +253,6 @@ func @bufferize_subtensor_insert(%t : tensor, %st0 : tensor<2x3xf32>, % // CHECK-DAG: %[[M1:.*]] = tensor_to_memref %[[T]] : memref // CHECK-DAG: %[[SM1:.*]] = tensor_to_memref %[[ST1]] : memref<2x?xf32> - // CHECK-NEXT: %[[C0:.*]] = constant 0 : index - // CHECK-NEXT: %[[DIM0:.*]] = dim %[[M1]], %[[C0]] : memref - // CHECK-NEXT: %[[C1:.*]] = constant 1 : index - // CHECK-NEXT: %[[DIM1:.*]] = dim %[[M1]], %[[C1]] : memref // CHECK-NEXT: %[[M1_COPY:.*]] = alloc(%[[DIM0]], %[[DIM1]]) : memref // CHECK-NEXT: linalg.copy(%[[M1]], %[[M1_COPY]]) : memref, memref // CHECK-NEXT: %[[SUBVIEW1:.*]] = subview %[[M1_COPY]][0, %[[IDX]]] [2, %[[IDX]]] [1, 2] @@ -239,3 +264,4 @@ func @bufferize_subtensor_insert(%t : tensor, %st0 : tensor<2x3xf32>, % // CHECK: return %[[RT0]], %[[RT1]] return %t0, %t1: tensor, tensor } + diff --git a/mlir/test/Dialect/Linalg/canonicalize-duplicate-inputs.mlir b/mlir/test/Dialect/Linalg/canonicalize-duplicate-inputs.mlir index 8c08fb390b9ee..de894b9192fb0 100644 --- a/mlir/test/Dialect/Linalg/canonicalize-duplicate-inputs.mlir +++ b/mlir/test/Dialect/Linalg/canonicalize-duplicate-inputs.mlir @@ -8,10 +8,12 @@ // CHECK-LABEL: @basic func @basic(%arg0: tensor) -> tensor { // CHECK: linalg.generic{{.*}}[#[[$MAP]], #[[$MAP]]] - // CHECK: ^bb0(%[[BBARG:.*]]: f32): + // CHECK: ^bb0(%[[BBARG:.*]]: f32, %{{.*}}: f32): // CHECK: addf %[[BBARG]], %[[BBARG]] - %0 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} ins(%arg0, %arg0 : tensor, tensor) { - ^bb0(%arg1: f32, %arg2: f32): + %0 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} + ins(%arg0, %arg0 : tensor, tensor) + outs(%arg0 : tensor) { + ^bb0(%arg1: f32, %arg2: f32, %arg3: f32): %1 = addf %arg1, %arg2 : f32 linalg.yield %1 : f32 } -> tensor @@ -31,8 +33,10 @@ func @basic(%arg0: tensor) -> tensor { // CHECK-LABEL: @distinct_affine_maps func @distinct_affine_maps(%arg0: tensor) -> tensor { // CHECK: linalg.generic{{.*}}[#[[$MAP0]], #[[$MAP1]], #[[$MAP0]]] - %0 = linalg.generic {indexing_maps = [#map0, #map1, #map0], iterator_types = ["parallel", "parallel"]} ins(%arg0, %arg0 : tensor, tensor) { - ^bb0(%arg1: f32, %arg2: f32): + %0 = linalg.generic {indexing_maps = [#map0, #map1, #map0], iterator_types = ["parallel", "parallel"]} + ins(%arg0, %arg0 : tensor, tensor) + outs(%arg0 : tensor) { + ^bb0(%arg1: f32, %arg2: f32, %arg3: f32): %1 = addf %arg1, %arg2 : f32 linalg.yield %1 : f32 } -> tensor @@ -52,10 +56,12 @@ func @distinct_affine_maps(%arg0: tensor) -> tensor { // CHECK-LABEL: @mixed_redundant_non_redundant func @mixed_redundant_non_redundant(%arg0: tensor) -> tensor { // CHECK: linalg.generic{{.*}}[#[[$MAP0]], #[[$MAP1]], #[[$MAP0]]] - // CHECK: ^bb0(%[[BBARG0:.*]]: f32, %[[BBARG1:.*]]: f32): + // CHECK: ^bb0(%[[BBARG0:.*]]: f32, %[[BBARG1:.*]]: f32, %{{[a-zA-Z0-9]+}}: f32): // CHECK: "test.elementwise_mappable"(%[[BBARG0]], %[[BBARG1]], %[[BBARG0]]) - %0 = linalg.generic {indexing_maps = [#map0, #map1, #map0, #map0], iterator_types = ["parallel", "parallel"]} ins(%arg0, %arg0, %arg0 : tensor, tensor, tensor) { - ^bb0(%arg1: f32, %arg2: f32, %arg3: f32): + %0 = linalg.generic {indexing_maps = [#map0, #map1, #map0, #map0], iterator_types = ["parallel", "parallel"]} + ins(%arg0, %arg0, %arg0 : tensor, tensor, tensor) + outs(%arg0 : tensor) { + ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32): %1 = "test.elementwise_mappable"(%arg1, %arg2, %arg3) : (f32, f32, f32) -> f32 linalg.yield %1 : f32 } -> tensor @@ -72,10 +78,12 @@ func @mixed_redundant_non_redundant(%arg0: tensor) -> tensor { // CHECK-LABEL: @multiple_different_redundant_args func @multiple_different_redundant_args(%arg0: tensor, %arg1: tensor) -> tensor { // CHECK: linalg.generic{{.*}}[#[[$MAP]], #[[$MAP]], #[[$MAP]]] - // CHECK: ^bb0(%[[BBARG0:.*]]: f32, %[[BBARG1:.*]]: f32): + // CHECK: ^bb0(%[[BBARG0:.*]]: f32, %[[BBARG1:.*]]: f32, %{{[a-zA-Z0-9]+}}: f32): // CHECK: "test.elementwise_mappable"(%[[BBARG0]], %[[BBARG1]], %[[BBARG0]], %[[BBARG1]]) - %0 = linalg.generic {indexing_maps = [#map, #map, #map, #map, #map], iterator_types = ["parallel"]} ins(%arg0, %arg1, %arg0, %arg1 : tensor, tensor, tensor, tensor) { - ^bb0(%arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32): + %0 = linalg.generic {indexing_maps = [#map, #map, #map, #map, #map], iterator_types = ["parallel"]} + ins(%arg0, %arg1, %arg0, %arg1 : tensor, tensor, tensor, tensor) + outs(%arg0 : tensor) { + ^bb0(%arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): %1 = "test.elementwise_mappable"(%arg2, %arg3, %arg4, %arg5) : (f32, f32, f32, f32) -> f32 linalg.yield %1 : f32 } -> tensor @@ -93,10 +101,12 @@ func @multiple_different_redundant_args(%arg0: tensor, %arg1: tensor) -> tensor { // CHECK: linalg.indexed_generic - // CHECK: ^bb0(%{{.*}}: index, %[[BBARG:.*]]: f32): + // CHECK: ^bb0(%{{.*}}: index, %[[BBARG:.*]]: f32, %{{[a-zA-Z0-9]+}}: f32): // CHECK: addf %[[BBARG]], %[[BBARG]] - %0 = linalg.indexed_generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} ins(%arg0, %arg0 : tensor, tensor) { - ^bb0(%index: index, %arg1: f32, %arg2: f32): + %0 = linalg.indexed_generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} + ins(%arg0, %arg0 : tensor, tensor) + outs(%arg0 : tensor) { + ^bb0(%index: index, %arg1: f32, %arg2: f32, %arg3: f32): %1 = addf %arg1, %arg2 : f32 linalg.yield %1 : f32 } -> tensor diff --git a/mlir/test/Dialect/Linalg/canonicalize.mlir b/mlir/test/Dialect/Linalg/canonicalize.mlir index 6c12070e07f18..f015d5fd64fd9 100644 --- a/mlir/test/Dialect/Linalg/canonicalize.mlir +++ b/mlir/test/Dialect/Linalg/canonicalize.mlir @@ -232,7 +232,6 @@ func @no_fold_memref_reshape(%arg0 : memref) -> memref // ----- #accesses = [ - affine_map<(i) -> (i)>, affine_map<(i) -> (i)> ] @@ -246,7 +245,7 @@ func @dce_zero_memref(%arg0 : memref<0xf32>, %arg1: tensor<0xf32>) -> tensor<0xf linalg.copy(%arg0, %arg0): memref<0xf32>, memref<0xf32> // tensor<0xf32> cannot be dce'ed - %1 = linalg.generic #trait ins(%arg1 : tensor<0xf32>) { + %1 = linalg.generic #trait outs(%arg1 : tensor<0xf32>) { ^bb(%0: f32) : linalg.yield %0 : f32 } -> tensor<0xf32> @@ -326,9 +325,9 @@ func @tensor.cast(%a : tensor<3x4xf32>, %b : tensor<4x?xf32>, %c : tensor<3x?xf3 %tc = tensor.cast %c : tensor<3x?xf32> to tensor // CHECK: linalg.matmul ins({{.*}}tensor<3x4xf32>, tensor<4x?xf32>) - // CHECK-SAME: init({{.*}}tensor<3x?xf32>) -> tensor<3x?xf32> + // CHECK-SAME: outs({{.*}}tensor<3x?xf32>) -> tensor<3x?xf32> %0 = linalg.matmul ins(%ta, %tb: tensor, tensor) - init(%tc: tensor) -> tensor + outs(%tc: tensor) -> tensor %1 = tensor.cast %0 : tensor to tensor<3x?xf32> @@ -344,7 +343,7 @@ func @tensor.cast(%a : tensor<3x4xf32>, %b : tensor<4x?xf32>, %c : tensor<3x?xf3 func @linalg_effects(%a : tensor, %b : memref, %c : tensor) { // CHECK-NOT: %{{.*}} = linalg.matmul %t = linalg.matmul ins(%a, %b : tensor, memref) - init(%c : tensor) -> tensor + outs(%c : tensor) -> tensor // CHECK-NOT: %{{.*}} = linalg.matmul linalg.matmul ins(%a, %c : tensor, tensor) diff --git a/mlir/test/Dialect/Linalg/convert-elementwise-to-linalg.mlir b/mlir/test/Dialect/Linalg/convert-elementwise-to-linalg.mlir index 7ea78fef7addb..8dca137843bb0 100644 --- a/mlir/test/Dialect/Linalg/convert-elementwise-to-linalg.mlir +++ b/mlir/test/Dialect/Linalg/convert-elementwise-to-linalg.mlir @@ -1,14 +1,20 @@ // RUN: mlir-opt -convert-elementwise-to-linalg -split-input-file %s | FileCheck %s // In-depth checking of the linalg.generic op for a very trivial case. -// CHECK: #map = affine_map<() -> ()> -// CHECK-LABEL: func @addf_rank0 +// CHECK: #[[$MAP:.*]] = affine_map<() -> ()> +// CHECK-LABEL: func @addf_rank0 +// CHECK-SAME: %[[ARG0:[0-9a-zA-Z]*]]: tensor +// CHECK-SAME: %[[ARG1:[0-9a-zA-Z]*]]: tensor func @addf_rank0(%arg0: tensor, %arg1: tensor) -> tensor { - // CHECK: %{{.*}} = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = []} ins(%{{.*}}, %{{.*}} : tensor, tensor) { - // CHECK: ^bb0(%[[LHS:.*]]: f32, %[[RHS:.*]]: f32): - // CHECK: %[[YIELD:.*]] = addf %[[LHS]], %[[RHS]] : f32 - // CHECK: linalg.yield %[[YIELD]] : f32 - // CHECK: } -> tensor + // CHECK: %{{.*}} = linalg.generic + // CHECK-SAME: indexing_maps = [#[[$MAP]], #[[$MAP]], #[[$MAP]]] + // CHECK-SAME: iterator_types = [] + // CHECK-SAME: ins(%[[ARG0]], %[[ARG1]] + // CHECK-SAME: outs(%[[ARG0]] + // CHECK: ^bb0(%[[LHS:.*]]: f32, %[[RHS:.*]]: f32, %{{.*}}: f32): + // CHECK: %[[YIELD:.*]] = addf %[[LHS]], %[[RHS]] : f32 + // CHECK: linalg.yield %[[YIELD]] : f32 + // CHECK: } -> tensor %0 = addf %arg0, %arg1 : tensor return %0 : tensor } @@ -16,10 +22,14 @@ func @addf_rank0(%arg0: tensor, %arg1: tensor) -> tensor { // ----- // Check indexing maps and iterator types for the rank > 0 case. -// CHECK: #map = affine_map<(d0) -> (d0)> // CHECK-LABEL: func @addf_rank1 +// CHECK-SAME: %[[ARG0:[0-9a-zA-Z]*]]: tensor +// CHECK-SAME: %[[ARG1:[0-9a-zA-Z]*]]: tensor func @addf_rank1(%arg0: tensor, %arg1: tensor) -> tensor { - // CHECK: linalg.generic{{.*}}indexing_maps = [#map, #map, #map], iterator_types = ["parallel"] + // CHECK: linalg.generic + // CHECK-SAME: iterator_types = ["parallel"] + // CHECK-SAME: ins(%[[ARG0]], %[[ARG1]] + // CHECK-SAME: outs(%[[ARG0]] %0 = addf %arg0, %arg1 : tensor return %0 : tensor } @@ -28,9 +38,12 @@ func @addf_rank1(%arg0: tensor, %arg1: tensor) -> tensor { // Check a unary op. // CHECK-LABEL: func @exp +// CHECK-SAME: %[[ARG0:[0-9a-zA-Z]*]]: tensor func @exp(%arg0: tensor) -> tensor { // CHECK: linalg.generic - // CHECK: ^bb0(%[[SCALAR:.*]]: f32): + // CHECK-SAME: ins(%[[ARG0]] + // CHECK-SAME: outs(%[[ARG0]] + // CHECK: ^bb0(%[[SCALAR:.*]]: f32, %{{.*}}: f32): // CHECK: %[[YIELD:.*]] = exp %[[SCALAR]] : f32 // CHECK: linalg.yield %[[YIELD]] : f32 %0 = exp %arg0 : tensor @@ -41,9 +54,14 @@ func @exp(%arg0: tensor) -> tensor { // Check a case with varying operand types. // CHECK-LABEL: func @select +// CHECK-SAME: %[[ARG0:[0-9a-zA-Z]*]]: tensor +// CHECK-SAME: %[[ARG1:[0-9a-zA-Z]*]]: tensor +// CHECK-SAME: %[[ARG2:[0-9a-zA-Z]*]]: tensor func @select(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { // CHECK: linalg.generic - // CHECK: ^bb0(%[[PRED:.*]]: i1, %[[TRUE_VAL:.*]]: i32, %[[FALSE_VAL:.*]]: i32): + // CHECK-SAME: ins(%[[ARG0]], %[[ARG1]], %[[ARG2]] + // CHECK-SAME: outs(%[[ARG1]] + // CHECK: ^bb0(%[[PRED:.*]]: i1, %[[TRUE_VAL:.*]]: i32, %[[FALSE_VAL:.*]]: i32, %{{.*}}: i32): // CHECK: select %[[PRED]], %[[TRUE_VAL]], %[[FALSE_VAL]] : i32 %0 = select %arg0, %arg1, %arg2 : tensor, tensor return %0 : tensor @@ -52,9 +70,41 @@ func @select(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tenso // ----- // Spot-check an op that requires copying attributes properly to the created scalar op. +// Also checks proper init_tensor usage. // CHECK-LABEL: func @cmpf( +// CHECK-SAME: %[[ARG0:[0-9a-zA-Z]*]]: tensor +// CHECK-SAME: %[[ARG1:[0-9a-zA-Z]*]]: tensor func @cmpf(%arg0: tensor, %arg1: tensor) -> tensor { + // CHECK: %[[INIT:.*]] = linalg.init_tensor [] : tensor + // CHECK: linalg.generic + // CHECK-SAME: ins(%[[ARG0]], %[[ARG1]] + // CHECK-SAME: outs(%[[INIT]] + // CHECK: ^bb0(%{{.*}}: f32, %{{.*}}: f32, %{{.*}}: i1): // CHECK: cmpf "olt", %{{.*}}, %{{.*}} : f32 %0 = cmpf "olt", %arg0, %arg1 : tensor return %0 : tensor } + +// ----- + +// Check proper init_tensor usage in a mixed case. +// CHECK-LABEL: func @cmpf( +// CHECK-SAME: %[[ARG0:[0-9a-zA-Z]*]]: tensor<4x?x?x8x2x?xf32> +// CHECK-SAME: %[[ARG1:[0-9a-zA-Z]*]]: tensor<4x?x?x8x2x?xf32> +func @cmpf(%arg0: tensor<4x?x?x8x2x?xf32>, %arg1: tensor<4x?x?x8x2x?xf32>) -> tensor<4x?x?x8x2x?xi1> { + // CHECK: %[[C1:.*]] = constant 1 : index + // CHECK: %[[D1:.*]] = dim %[[ARG0]], %[[C1]] : tensor<4x?x?x8x2x?xf32> + // CHECK: %[[C2:.*]] = constant 2 : index + // CHECK: %[[D2:.*]] = dim %[[ARG0]], %[[C2]] : tensor<4x?x?x8x2x?xf32> + // CHECK: %[[C5:.*]] = constant 5 : index + // CHECK: %[[D5:.*]] = dim %[[ARG0]], %[[C5]] : tensor<4x?x?x8x2x?xf32> + // CHECK: %[[INIT:.*]] = linalg.init_tensor [4, %[[D1]], %[[D2]], 8, 2, %[[D5]]] : tensor<4x?x?x8x2x?xi1> + // CHECK: linalg.generic + // CHECK-SAME: ins(%[[ARG0]], %[[ARG1]] + // CHECK-SAME: outs(%[[INIT]] + // CHECK: ^bb0(%{{.*}}: f32, %{{.*}}: f32, %{{.*}}: i1): + // CHECK: cmpf "olt", %{{.*}}, %{{.*}} : f32 + %0 = cmpf "olt", %arg0, %arg1 : tensor<4x?x?x8x2x?xf32> + return %0 : tensor<4x?x?x8x2x?xi1> +} + diff --git a/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir b/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir index e04d03b4e4938..17b8bda967b1e 100644 --- a/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir +++ b/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -linalg-fold-unit-extent-dims -split-input-file | FileCheck %s +// RUN: mlir-opt %s -split-input-file -linalg-fold-unit-extent-dims | FileCheck %s #accesses = [ affine_map<(i, j, k, l, m) -> (i, k, m)>, @@ -11,12 +11,12 @@ library_call = "some_external_func" } -func @drop_one_trip_loops(%arg0 : tensor) -> tensor -{ +func @drop_one_trip_loops(%arg0 : tensor, %shape: tensor) -> tensor { %0 = linalg.generic #trait - ins(%arg0 : tensor) { - ^bb0(%arg1 : f32) : - linalg.yield %arg1 : f32 + ins(%arg0 : tensor) + outs(%shape : tensor) { + ^bb0(%arg2 : f32, %arg3 : f32) : + linalg.yield %arg2 : f32 } -> tensor return %0 : tensor } @@ -48,12 +48,13 @@ func @drop_one_trip_loops(%arg0 : tensor) -> tensor } func @drop_one_trip_loops_indexed_generic - (%arg0 : tensor) -> tensor + (%arg0 : tensor, %shape: tensor) -> tensor { %0 = linalg.indexed_generic #trait - ins(%arg0 : tensor) { + ins(%arg0 : tensor) + outs(%shape: tensor) { ^bb0(%arg1 : index, %arg2 : index, %arg3 : index, %arg4 : index, - %arg5 : index, %arg6 : i32) : + %arg5 : index, %arg6 : i32, %arg7 : i32) : %1 = addi %arg1, %arg2 : index %2 = addi %1, %arg3 : index %3 = addi %2, %arg4 : index @@ -68,7 +69,7 @@ func @drop_one_trip_loops_indexed_generic // CHECK: linalg.indexed_generic // CHECK: ^{{.+}}( // CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: index, %[[ARG2:[a-zA-Z0-9]+]]: index -// CHECK-SAME: %[[ARG3:[a-zA-Z0-9]+]]: index, %[[ARG4:[a-zA-Z0-9]+]]: i32) +// CHECK-SAME: %[[ARG3:[a-zA-Z0-9]+]]: index, %[[ARG4:[a-zA-Z0-9]+]]: i32, %{{.*}}: i32) // CHECK: %[[T3:.+]] = addi %[[ARG1]], %[[ARG2]] // CHECK: %[[T4:.+]] = addi %[[T3]], %[[ARG3]] // CHECK: %[[T5:.+]] = index_cast %[[T4]] : index to i32 @@ -88,8 +89,9 @@ func @drop_one_trip_loops_indexed_generic func @drop_all_loops(%arg0 : tensor<1x1xf32>) -> tensor<1x1xf32> { %0 = linalg.generic #trait - ins(%arg0 : tensor<1x1xf32>) { - ^bb0(%arg1: f32) : + ins(%arg0 : tensor<1x1xf32>) + outs(%arg0 : tensor<1x1xf32>) { + ^bb0(%arg1: f32, %arg2: f32) : linalg.yield %arg1 : f32 } -> tensor<1x1xf32> return %0 : tensor<1x1xf32> @@ -112,11 +114,11 @@ func @drop_all_loops(%arg0 : tensor<1x1xf32>) -> tensor<1x1xf32> } func @drop_all_loops_indexed_generic - (%arg0 : tensor<1x1xi32>) -> tensor<1x1xi32> -{ + (%arg0 : tensor<1x1xi32>) -> tensor<1x1xi32>{ %0 = linalg.indexed_generic #trait - ins(%arg0 : tensor<1x1xi32>) { - ^bb0(%arg1 : index, %arg2 : index, %arg3: i32) : + ins(%arg0 : tensor<1x1xi32>) + outs(%arg0 : tensor<1x1xi32>) { + ^bb0(%arg1 : index, %arg2 : index, %arg3: i32, %arg4: i32) : %1 = addi %arg1, %arg2 : index %2 = index_cast %1 : index to i32 %3 = addi %2, %arg3 : i32 @@ -127,7 +129,7 @@ func @drop_all_loops_indexed_generic // CHECK-LABEL: func @drop_all_loops_indexed_generic // CHECK: linalg.indexed_generic -// CHECK: ^{{.+}}(%[[ARG1:.+]]: i32) +// CHECK: ^{{.+}}(%[[ARG1:.+]]: i32, %[[ARG2:.+]]: i32) // CHECK: linalg.yield %[[ARG1]] : i32 // ----- @@ -143,10 +145,11 @@ func @drop_all_loops_indexed_generic library_call = "some_external_fn" } -func @leading_dim_1_canonicalization(%arg0: tensor<1x5xf32>) -> tensor<5xf32> { +func @leading_dim_1_canonicalization(%arg0: tensor<1x5xf32>, %shape: tensor<5xf32>) -> tensor<5xf32> { %0 = linalg.generic #trait - ins(%arg0 : tensor<1x5xf32>) { - ^bb0(%arg2: f32): // no predecessors + ins(%arg0 : tensor<1x5xf32>) + outs(%shape : tensor<5xf32>) { + ^bb0(%arg2: f32, %arg3: f32): // no predecessors linalg.yield %arg2 : f32 } -> tensor<5xf32> return %0 : tensor<5xf32> @@ -172,16 +175,17 @@ func @leading_dim_1_canonicalization(%arg0: tensor<1x5xf32>) -> tensor<5xf32> { library_call = "some_external_fn" } -func @broadcast_test(%arg0 : tensor<5xf32>, %arg1 : tensor<5xf32>) -> tensor<5x5xf32> +func @broadcast_test(%arg0 : tensor<5xf32>, %arg1 : tensor<5xf32>, %shape : tensor<5x5xf32>) -> tensor<5x5xf32> { %0 = linalg.tensor_reshape %arg0 [affine_map<(d0, d1) -> (d0, d1)>] : tensor<5xf32> into tensor<1x5xf32> %1 = linalg.tensor_reshape %arg1 [affine_map<(d0, d1) -> (d0, d1)>] : tensor<5xf32> into tensor<5x1xf32> %2 = linalg.generic #trait - ins(%0, %1 : tensor<1x5xf32>, tensor<5x1xf32>) { - ^bb0(%arg2: f32, %arg3: f32): - %3 = addf %arg2, %arg3 : f32 + ins(%0, %1 : tensor<1x5xf32>, tensor<5x1xf32>) + outs(%shape : tensor<5x5xf32>) { + ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): + %3 = addf %arg3, %arg4 : f32 linalg.yield %3 : f32 } -> tensor<5x5xf32> return %2 : tensor<5x5xf32> @@ -209,12 +213,13 @@ func @broadcast_test(%arg0 : tensor<5xf32>, %arg1 : tensor<5xf32>) -> tensor<5x5 library_call = "some_external_fn" } -func @broadcast_scalar(%arg0 : tensor<1x1xf32>) -> tensor +func @broadcast_scalar(%arg0 : tensor<1x1xf32>, %shape : tensor) -> tensor { %0 = linalg.generic #trait - ins(%arg0 : tensor<1x1xf32>) { - ^bb0(%arg1 : f32): - linalg.yield %arg1 : f32 + ins(%arg0 : tensor<1x1xf32>) + outs(%shape : tensor) { + ^bb0(%arg2 : f32, %arg3 : f32): + linalg.yield %arg2 : f32 } -> tensor return %0 : tensor } diff --git a/mlir/test/Dialect/Linalg/fold-unit-trip-loops.mlir b/mlir/test/Dialect/Linalg/fold-unit-trip-loops.mlir index 6d75c480b5c6a..d0c526e441b69 100644 --- a/mlir/test/Dialect/Linalg/fold-unit-trip-loops.mlir +++ b/mlir/test/Dialect/Linalg/fold-unit-trip-loops.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -linalg-fold-unit-extent-dims="fold-one-trip-loops-only" -split-input-file | FileCheck %s +// RUN: mlir-opt %s -split-input-file -linalg-fold-unit-extent-dims="fold-one-trip-loops-only" | FileCheck %s #accesses = [ affine_map<(i, j, k, l, m) -> (i, k, m)>, @@ -11,11 +11,12 @@ library_call = "some_external_func" } -func @drop_one_trip_loops(%arg0 : tensor) -> tensor +func @drop_one_trip_loops(%arg0 : tensor, %shape: tensor) -> tensor { %0 = linalg.generic #trait - ins(%arg0 : tensor) { - ^bb0(%arg1 : f32) : + ins(%arg0 : tensor) + outs(%shape : tensor) { + ^bb0(%arg1 : f32, %arg2 : f32) : linalg.yield %arg1 : f32 } -> tensor return %0 : tensor @@ -40,8 +41,9 @@ func @drop_one_trip_loops(%arg0 : tensor) -> tensor func @drop_all_loops(%arg0 : tensor<1x1xf32>) -> tensor<1x1xf32> { %0 = linalg.generic #trait - ins(%arg0 : tensor<1x1xf32>) { - ^bb0(%arg1: f32) : + ins(%arg0 : tensor<1x1xf32>) + outs(%arg0 : tensor<1x1xf32>) { + ^bb0(%arg1: f32, %arg2: f32) : linalg.yield %arg1 : f32 } -> tensor<1x1xf32> return %0 : tensor<1x1xf32> @@ -91,10 +93,11 @@ func @drop_all_loops(%arg0 : memref<1x1xf32>, %arg1 : memref<1x1xf32>) library_call = "some_external_fn" } -func @leading_dim_1_canonicalization(%arg0: tensor<1x5xf32>) -> tensor<5xf32> { +func @leading_dim_1_canonicalization(%arg0: tensor<1x5xf32>, %shape: tensor<5xf32>) -> tensor<5xf32> { %0 = linalg.generic #trait - ins(%arg0 : tensor<1x5xf32>) { - ^bb0(%arg2: f32): // no predecessors + ins(%arg0 : tensor<1x5xf32>) + outs(%shape : tensor<5xf32>) { + ^bb0(%arg2: f32, %arg3: f32): // no predecessors linalg.yield %arg2 : f32 } -> tensor<5xf32> return %0 : tensor<5xf32> diff --git a/mlir/test/Dialect/Linalg/fusion-tensor.mlir b/mlir/test/Dialect/Linalg/fusion-tensor.mlir index ff0394f18249c..df7e59d59dde6 100644 --- a/mlir/test/Dialect/Linalg/fusion-tensor.mlir +++ b/mlir/test/Dialect/Linalg/fusion-tensor.mlir @@ -6,29 +6,36 @@ // CHECK-LABEL: @add_mul_fusion func @add_mul_fusion(%arg0: tensor, %arg1 : tensor, %arg2 : tensor) -> tensor { - %0 = linalg.generic {indexing_maps = [#map0, #map0, #map0], iterator_types = ["parallel", "parallel"]} - ins(%arg0, %arg1 : tensor, tensor) { - ^bb0(%arg3: f32, %arg4: f32): // no predecessors - %1 = addf %arg3, %arg4 : f32 - linalg.yield %1 : f32 + %c0 = constant 0 : index + %c1 = constant 1 : index + %0 = dim %arg0, %c0 : tensor + %1 = dim %arg0, %c1 : tensor + %2 = linalg.init_tensor [%0, %1] : tensor + %3 = linalg.generic {indexing_maps = [#map0, #map0, #map0], iterator_types = ["parallel", "parallel"]} + ins(%arg0, %arg1 : tensor, tensor) + outs(%2 : tensor) { + ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): // no predecessors + %4 = addf %arg3, %arg4 : f32 + linalg.yield %4 : f32 } -> tensor // CHECK: linalg.generic { // CHECK-SAME: indexing_maps = {{\[}}[[$MAP0]], [[$MAP0]], [[$MAP0]], [[$MAP0]]{{\]}} - %2 = linalg.generic {indexing_maps = [#map0, #map0, #map0], iterator_types = ["parallel", "parallel"]} - ins(%0, %arg2 : tensor, tensor) { + %4 = linalg.generic {indexing_maps = [#map0, #map0, #map0], iterator_types = ["parallel", "parallel"]} + ins(%3, %arg2 : tensor, tensor) + outs(%2 : tensor) { // CHECK: ^{{[a-zA-Z0-9_]*}} // CHECK-SAME: [[ARG0:%[a-zA-Z0-9_]*]] // CHECK-SAME: [[ARG1:%[a-zA-Z0-9_]*]] // CHECK-SAME: [[ARG2:%[a-zA-Z0-9_]*]] - ^bb0(%arg5: f32, %arg6: f32): // no predecessors + ^bb0(%arg5: f32, %arg6: f32, %arg7: f32): // no predecessors // CHECK: [[T1:%[a-zA-Z0-9_]*]] = addf [[ARG0]], [[ARG1]] // CHECK-NOT: linalg.yield // CHECK: mulf [[T1]], [[ARG2]] // CHECK: linalg.yield - %3 = mulf %arg5, %arg6 : f32 - linalg.yield %3 : f32 + %5 = mulf %arg5, %arg6 : f32 + linalg.yield %5 : f32 } -> tensor - return %2 : tensor + return %4 : tensor } // ----- @@ -41,21 +48,28 @@ func @add_mul_fusion(%arg0: tensor, %arg1 : tensor, %arg2 : te // CHECK-LABEL: @transpose_add_mul_fusion func @transpose_add_mul_fusion(%arg0: tensor, %arg1 : tensor, %arg2 : tensor) -> tensor { - %0 = linalg.generic {indexing_maps = [#map0, #map1, #map0], iterator_types = ["parallel", "parallel"]} - ins(%arg0, %arg1 : tensor, tensor) { - ^bb0(%arg3: f32, %arg4: f32): // no predecessors - %1 = addf %arg3, %arg4 : f32 - linalg.yield %1 : f32 + %c0 = constant 0 : index + %c1 = constant 1 : index + %0 = dim %arg0, %c0 : tensor + %1 = dim %arg0, %c1 : tensor + %2 = linalg.init_tensor [%0, %1] : tensor + %3 = linalg.generic {indexing_maps = [#map0, #map1, #map0], iterator_types = ["parallel", "parallel"]} + ins(%arg0, %arg1 : tensor, tensor) + outs(%2 : tensor) { + ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): // no predecessors + %4 = addf %arg3, %arg4 : f32 + linalg.yield %4 : f32 } -> tensor // CHECK: linalg.generic { // CHECK-SAME: indexing_maps = {{\[}}[[$MAP0]], [[$MAP1]], [[$MAP0]], [[$MAP0]]{{\]}} - %2 = linalg.generic {indexing_maps = [#map0, #map0, #map0], iterator_types = ["parallel", "parallel"]} - ins(%0, %arg2 : tensor, tensor) { - ^bb0(%arg5: f32, %arg6: f32): // no predecessors - %3 = mulf %arg5, %arg6 : f32 - linalg.yield %3 : f32 + %4 = linalg.generic {indexing_maps = [#map0, #map0, #map0], iterator_types = ["parallel", "parallel"]} + ins(%3, %arg2 : tensor, tensor) + outs(%2 : tensor) { + ^bb0(%arg5: f32, %arg6: f32, %arg7: f32): // no predecessors + %5 = mulf %arg5, %arg6 : f32 + linalg.yield %5 : f32 } -> tensor - return %2 : tensor + return %4 : tensor } // ----- @@ -68,21 +82,28 @@ func @transpose_add_mul_fusion(%arg0: tensor, %arg1 : tensor, // CHECK-LABEL: @add_transpose_mul_fusion func @add_transpose_mul_fusion(%arg0: tensor, %arg1 : tensor, %arg2 : tensor) -> tensor { - %0 = linalg.generic {indexing_maps = [#map0, #map1, #map0], iterator_types = ["parallel", "parallel"]} - ins(%arg0, %arg1 : tensor, tensor) { - ^bb0(%arg3: f32, %arg4: f32): // no predecessors - %1 = addf %arg3, %arg4 : f32 - linalg.yield %1 : f32 + %c0 = constant 0 : index + %c1 = constant 1 : index + %0 = dim %arg0, %c0 : tensor + %1 = dim %arg0, %c1 : tensor + %2 = linalg.init_tensor [%0, %1] : tensor + %3 = linalg.generic {indexing_maps = [#map0, #map1, #map0], iterator_types = ["parallel", "parallel"]} + ins(%arg0, %arg1 : tensor, tensor) + outs(%2 : tensor) { + ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): // no predecessors + %4 = addf %arg3, %arg4 : f32 + linalg.yield %4 : f32 } -> tensor // CHECK: linalg.generic { // CHECK-SAME: indexing_maps = {{\[}}[[$MAP1]], [[$MAP0]], [[$MAP0]], [[$MAP0]]{{\]}} - %2 = linalg.generic {indexing_maps = [#map1, #map0, #map0], iterator_types = ["parallel", "parallel"]} - ins(%0, %arg2 : tensor, tensor) { - ^bb0(%arg5: f32, %arg6: f32): // no predecessors - %3 = mulf %arg5, %arg6 : f32 - linalg.yield %3 : f32 + %4 = linalg.generic {indexing_maps = [#map1, #map0, #map0], iterator_types = ["parallel", "parallel"]} + ins(%3, %arg2 : tensor, tensor) + outs(%2 : tensor){ + ^bb0(%arg5: f32, %arg6: f32, %arg7: f32): // no predecessors + %5= mulf %arg5, %arg6 : f32 + linalg.yield %5 : f32 } -> tensor - return %2 : tensor + return %4 : tensor } // ----- @@ -96,21 +117,29 @@ func @add_transpose_mul_fusion(%arg0: tensor, %arg1 : tensor, // CHECK-LABEL: @add_broadcast_mul_fusion func @add_broadcast_mul_fusion(%arg0: tensor, %arg1 : tensor, %arg2 : tensor) -> tensor { - %0 = linalg.generic {indexing_maps = [#map2, #map2, #map2], iterator_types = ["parallel"]} - ins(%arg0, %arg1 : tensor, tensor) { - ^bb0(%arg3: f32, %arg4: f32): // no predecessors - %1 = addf %arg3, %arg4 : f32 - linalg.yield %1 : f32 + %c0 = constant 0 : index + %c1 = constant 1 : index + %0 = dim %arg0, %c0 : tensor + %1 = linalg.init_tensor [%0] : tensor + %2 = linalg.generic {indexing_maps = [#map2, #map2, #map2], iterator_types = ["parallel"]} + ins(%arg0, %arg1 : tensor, tensor) + outs(%1 : tensor) { + ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): // no predecessors + %3 = addf %arg3, %arg4 : f32 + linalg.yield %3 : f32 } -> tensor // CHECK: linalg.generic { // CHECK-SAME: indexing_maps = {{\[}}[[$MAP1]], [[$MAP1]], [[$MAP0]], [[$MAP0]] - %2 = linalg.generic {indexing_maps = [#map1, #map0, #map0], iterator_types = ["parallel", "parallel"]} - ins(%0, %arg2 : tensor, tensor) { - ^bb0(%arg5: f32, %arg6: f32): // no predecessors - %3 = mulf %arg5, %arg6 : f32 - linalg.yield %3 : f32 + %3 = dim %arg2, %c1 : tensor + %4 = linalg.init_tensor [%0, %3] : tensor + %5 = linalg.generic {indexing_maps = [#map1, #map0, #map0], iterator_types = ["parallel", "parallel"]} + ins(%2, %arg2 : tensor, tensor) + outs(%4 : tensor){ + ^bb0(%arg5: f32, %arg6: f32, %arg7: f32): // no predecessors + %6 = mulf %arg5, %arg6 : f32 + linalg.yield %6 : f32 } -> tensor - return %2 : tensor + return %5 : tensor } // ----- @@ -121,23 +150,26 @@ func @add_broadcast_mul_fusion(%arg0: tensor, %arg1 : tensor, %arg // CHECK-LABEL: @add_mul_scalar_fusion func @add_mul_scalar_fusion(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { - %0 = linalg.generic {indexing_maps = [#map0, #map0, #map0], iterator_types = []} - ins(%arg0, %arg1 : tensor, tensor) { - ^bb0(%arg3: f32, %arg4: f32): // no predecessors - %1 = addf %arg3, %arg4 : f32 - linalg.yield %1 : f32 + %0 = linalg.init_tensor [] : tensor + %1 = linalg.generic {indexing_maps = [#map0, #map0, #map0], iterator_types = []} + ins(%arg0, %arg1 : tensor, tensor) + outs(%0 : tensor) { + ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): // no predecessors + %2 = addf %arg3, %arg4 : f32 + linalg.yield %2 : f32 } -> tensor // CHECK: linalg.generic { // CHECK: addf // CHECK: mulf - %1 = linalg.generic {indexing_maps = [#map0, #map0, #map0], iterator_types = []} - ins(%0, %arg2 : tensor, tensor) { - ^bb0(%arg3: f32, %arg4: f32): // no predecessors - %1 = mulf %arg3, %arg4 : f32 - linalg.yield %1 : f32 + %2 = linalg.generic {indexing_maps = [#map0, #map0, #map0], iterator_types = []} + ins(%1, %arg2 : tensor, tensor) + outs(%0 : tensor) { + ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): // no predecessors + %3 = mulf %arg3, %arg4 : f32 + linalg.yield %3 : f32 } -> tensor - return %1 : tensor + return %2 : tensor } // ----- @@ -146,22 +178,29 @@ func @add_mul_scalar_fusion(%arg0: tensor, %arg1: tensor, %arg2: tenso #map1 = affine_map<(d0, d1, d2) -> (d0, d1, d2)> func @generic_op_constant_fusion(%arg0 : tensor<5x?x?xf32>) -> tensor<5x?x?xf32> { - %0 = constant dense<42.0> : tensor<5xf32> - %1 = linalg.generic { - indexing_maps = [#map0, #map1, #map1], - iterator_types = ["parallel", "parallel", "parallel"]} - ins(%0, %arg0 : tensor<5xf32>, tensor<5x?x?xf32>) { - ^bb0(%arg1: f32, %arg2: f32): - %2 = mulf %arg1, %arg2 : f32 - linalg.yield %2 : f32 - } -> tensor<5x?x?xf32> - return %1 : tensor<5x?x?xf32> + %c0 = constant 0 : index + %c1 = constant 1 : index + %c2 = constant 2 : index + %cst = constant dense<42.0> : tensor<5xf32> + %0 = dim %arg0, %c1 : tensor<5x?x?xf32> + %1 = dim %arg0, %c2 : tensor<5x?x?xf32> + %2 = linalg.init_tensor [5, %0, %1] : tensor<5x?x?xf32> + %3 = linalg.generic { + indexing_maps = [#map0, #map1, #map1], + iterator_types = ["parallel", "parallel", "parallel"]} + ins(%cst, %arg0 : tensor<5xf32>, tensor<5x?x?xf32>) + outs(%2 : tensor<5x?x?xf32>) { + ^bb0(%arg1: f32, %arg2: f32, %arg3: f32): + %4 = mulf %arg1, %arg2 : f32 + linalg.yield %4 : f32 + } -> tensor<5x?x?xf32> + return %3 : tensor<5x?x?xf32> } // CHECK-DAG: #[[$MAP0:.*]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)> // CHECK-LABEL: func @generic_op_constant_fusion // CHECK: %[[CST:.*]] = constant {{.*}} : f32 // CHECK: linalg.generic -// CHECK: ^{{.*}}(%[[ARG1:.*]]: f32) +// CHECK: ^{{.+}}(%[[ARG1:[a-zA-Z0-9_]+]]: f32, %{{.+}}: f32): // CHECK: mulf %[[CST]], %[[ARG1]] // ----- @@ -171,16 +210,23 @@ func @generic_op_constant_fusion(%arg0 : tensor<5x?x?xf32>) -> tensor<5x?x?xf32> func @indexed_generic_op_constant_fusion(%arg0 : tensor<5x?x?xf32>) -> tensor<5x?x?xf32> { - %0 = constant dense<42.0> : tensor<5xf32> - %1 = linalg.indexed_generic { - indexing_maps = [#map0, #map1, #map1], - iterator_types = ["parallel", "parallel", "parallel"]} - ins(%0, %arg0 : tensor<5xf32>, tensor<5x?x?xf32>) { - ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: f32, %arg5 : f32): - %2 = mulf %arg4, %arg5 : f32 - linalg.yield %2 : f32 - } -> tensor<5x?x?xf32> - return %1 : tensor<5x?x?xf32> + %c0 = constant 0 : index + %c1 = constant 1 : index + %c2 = constant 2 : index + %cst = constant dense<42.0> : tensor<5xf32> + %0 = dim %arg0, %c1 : tensor<5x?x?xf32> + %1 = dim %arg0, %c2 : tensor<5x?x?xf32> + %2 = linalg.init_tensor [5, %0, %1] : tensor<5x?x?xf32> + %3 = linalg.indexed_generic { + indexing_maps = [#map0, #map1, #map1], + iterator_types = ["parallel", "parallel", "parallel"]} + ins(%cst, %arg0 : tensor<5xf32>, tensor<5x?x?xf32>) + outs(%2 : tensor<5x?x?xf32>) { + ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: f32, %arg5 : f32, %arg6 : f32): + %4 = mulf %arg4, %arg5 : f32 + linalg.yield %4 : f32 + } -> tensor<5x?x?xf32> + return %3 : tensor<5x?x?xf32> } // CHECK-DAG: #[[$MAP0:.*]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)> // CHECK-LABEL: func @indexed_generic_op_constant_fusion @@ -190,7 +236,7 @@ func @indexed_generic_op_constant_fusion(%arg0 : tensor<5x?x?xf32>) // CHECK-SAME: %[[ARG1:[a-zA-Z0-9]*]]: index // CHECK-SAME: %[[ARG2:[a-zA-Z0-9]*]]: index // CHECK-SAME: %[[ARG3:[a-zA-Z0-9]*]]: index -// CHECK-SAME: %[[ARG4:.*]]: f32) +// CHECK-SAME: %[[ARG4:[a-zA-Z0-9_]*]]: f32, %{{.*}}: f32) // CHECK: mulf %[[CST]], %[[ARG4]] // ----- @@ -200,22 +246,29 @@ func @indexed_generic_op_constant_fusion(%arg0 : tensor<5x?x?xf32>) func @generic_op_zero_dim_constant_fusion(%arg0 : tensor<5x?x?xf32>) -> tensor<5x?x?xf32> { - %0 = constant dense<42.0> : tensor - %1 = linalg.generic { - indexing_maps = [#map0, #map1, #map1], - iterator_types = ["parallel", "parallel", "parallel"]} - ins(%0, %arg0 : tensor, tensor<5x?x?xf32>) { - ^bb0(%arg1: f32, %arg2: f32): - %2 = mulf %arg1, %arg2 : f32 - linalg.yield %2 : f32 - } -> tensor<5x?x?xf32> - return %1 : tensor<5x?x?xf32> + %c0 = constant 0 : index + %c1 = constant 1 : index + %c2 = constant 2 : index + %cst = constant dense<42.0> : tensor + %0 = dim %arg0, %c1 : tensor<5x?x?xf32> + %1 = dim %arg0, %c2 : tensor<5x?x?xf32> + %2 = linalg.init_tensor [5, %0, %1] : tensor<5x?x?xf32> + %3 = linalg.generic { + indexing_maps = [#map0, #map1, #map1], + iterator_types = ["parallel", "parallel", "parallel"]} + ins(%cst, %arg0 : tensor, tensor<5x?x?xf32>) + outs(%2 : tensor<5x?x?xf32>) { + ^bb0(%arg1: f32, %arg2: f32, %arg3: f32): + %4 = mulf %arg1, %arg2 : f32 + linalg.yield %4 : f32 + } -> tensor<5x?x?xf32> + return %3 : tensor<5x?x?xf32> } // CHECK-DAG: #[[$MAP0:.*]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)> // CHECK-LABEL: func @generic_op_zero_dim_constant_fusion // CHECK: %[[CST:.*]] = constant {{.*}} : f32 // CHECK: linalg.generic -// CHECK: ^{{.*}}(%[[ARG1:.*]]: f32) +// CHECK: ^{{.*}}(%[[ARG1:[a-zA-Z0-9_]*]]: f32, %{{.*}}: f32) // CHECK: mulf %[[CST]], %[[ARG1]] // ----- @@ -225,16 +278,23 @@ func @generic_op_zero_dim_constant_fusion(%arg0 : tensor<5x?x?xf32>) func @indexed_generic_op_zero_dim_constant_fusion (%arg0 : tensor<5x?x?xf32>) -> tensor<5x?x?xf32> { - %0 = constant dense<42.0> : tensor - %1 = linalg.indexed_generic { - indexing_maps = [#map0, #map1, #map1], - iterator_types = ["parallel", "parallel", "parallel"]} - ins(%0, %arg0 : tensor, tensor<5x?x?xf32>) { - ^bb0(%arg1 : index, %arg2 : index, %arg3 : index, %arg4: f32, %arg5: f32): - %2 = mulf %arg4, %arg5 : f32 - linalg.yield %2 : f32 - } -> tensor<5x?x?xf32> - return %1 : tensor<5x?x?xf32> + %c0 = constant 0 : index + %c1 = constant 1 : index + %c2 = constant 2 : index + %cst = constant dense<42.0> : tensor + %0 = dim %arg0, %c1 : tensor<5x?x?xf32> + %1 = dim %arg0, %c2 : tensor<5x?x?xf32> + %2 = linalg.init_tensor [5, %0, %1] : tensor<5x?x?xf32> + %3 = linalg.indexed_generic { + indexing_maps = [#map0, #map1, #map1], + iterator_types = ["parallel", "parallel", "parallel"]} + ins(%cst, %arg0 : tensor, tensor<5x?x?xf32>) + outs(%2 : tensor<5x?x?xf32>) { + ^bb0(%arg1 : index, %arg2 : index, %arg3 : index, %arg4: f32, %arg5: f32, %arg6: f32): + %4 = mulf %arg4, %arg5 : f32 + linalg.yield %4 : f32 + } -> tensor<5x?x?xf32> + return %3 : tensor<5x?x?xf32> } // CHECK-DAG: #[[$MAP0:.*]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)> // CHECK-LABEL: func @indexed_generic_op_zero_dim_constant_fusion @@ -244,7 +304,7 @@ func @indexed_generic_op_zero_dim_constant_fusion // CHECK-SAME: %[[ARG1:[a-zA-Z0-9]*]]: index // CHECK-SAME: %[[ARG2:[a-zA-Z0-9]*]]: index // CHECK-SAME: %[[ARG3:[a-zA-Z0-9]*]]: index -// CHECK-SAME: %[[ARG4:.*]]: f32) +// CHECK-SAME: %[[ARG4:[a-zA-Z0-9_]*]]: f32, %{{.*}}: f32) // CHECK: mulf %[[CST]], %[[ARG4]] // ----- @@ -252,26 +312,33 @@ func @indexed_generic_op_zero_dim_constant_fusion #map0 = affine_map<(d0, d1) -> (d0, d1)> func @generic_op_indexed_generic_op_fusion(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = linalg.generic { - indexing_maps = [#map0, #map0, #map0], - iterator_types = ["parallel", "parallel"] } - ins(%arg0, %arg1 : tensor, tensor) { - ^bb0(%arg2: i32, %arg3: i32): // no predecessors + %c0 = constant 0 : index + %c1 = constant 1 : index + %0 = dim %arg0, %c0 : tensor + %1 = dim %arg0, %c1 : tensor + %2 = linalg.init_tensor [%0, %1] : tensor + %3 = linalg.generic { + indexing_maps = [#map0, #map0, #map0], + iterator_types = ["parallel", "parallel"] } + ins(%arg0, %arg1 : tensor, tensor) + outs(%2 : tensor) { + ^bb0(%arg2: i32, %arg3: i32, %arg4: i32): // no predecessors %10 = addi %arg2, %arg3 : i32 linalg.yield %10 : i32 } -> tensor - %1 = linalg.indexed_generic { - indexing_maps = [#map0, #map0], - iterator_types = ["parallel", "parallel"] } - ins(%0 : tensor) { - ^bb0(%arg2: index, %arg3: index, %arg4: i32): // no predecessors - %2 = index_cast %arg2 : index to i32 - %3 = index_cast %arg3 : index to i32 - %4 = addi %arg4, %2 : i32 - %5 = subi %4, %3 : i32 - linalg.yield %5 : i32 + %4 = linalg.indexed_generic { + indexing_maps = [#map0, #map0], + iterator_types = ["parallel", "parallel"] } + ins(%3 : tensor) + outs(%2 : tensor) { + ^bb0(%arg2: index, %arg3: index, %arg4: i32, %arg5: i32): // no predecessors + %5 = index_cast %arg2 : index to i32 + %6 = index_cast %arg3 : index to i32 + %7 = addi %arg4, %5 : i32 + %8 = subi %7, %6 : i32 + linalg.yield %8 : i32 } -> tensor - return %1 : tensor + return %4 : tensor } // CHECK-DAG: #[[$MAP0:.*]] = affine_map<(d0, d1) -> (d0, d1)> // CHECK-LABEL: func @generic_op_indexed_generic_op_fusion @@ -295,26 +362,33 @@ func @generic_op_indexed_generic_op_fusion(%arg0: tensor, #map0 = affine_map<(d0, d1) -> (d0, d1)> func @indexed_generic_op_generic_op_fusion(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = linalg.indexed_generic { + %c0 = constant 0 : index + %c1 = constant 1 : index + %0 = dim %arg0, %c0 : tensor + %1 = dim %arg0, %c1 : tensor + %2 = linalg.init_tensor [%0, %1] : tensor + %3 = linalg.indexed_generic { indexing_maps = [#map0, #map0], iterator_types = ["parallel", "parallel"] } - ins(%arg0 : tensor) { - ^bb0(%arg2: index, %arg3: index, %arg4: i32): // no predecessors - %2 = index_cast %arg2 : index to i32 - %3 = index_cast %arg3 : index to i32 - %4 = addi %arg4, %2 : i32 - %5 = subi %4, %3 : i32 - linalg.yield %5 : i32 - } -> tensor - %1 = linalg.generic { + ins(%arg0 : tensor) + outs(%2 : tensor) { + ^bb0(%arg2: index, %arg3: index, %arg4: i32, %arg5: i32): // no predecessors + %4 = index_cast %arg2 : index to i32 + %5 = index_cast %arg3 : index to i32 + %6 = addi %arg4, %4 : i32 + %7 = subi %6, %5 : i32 + linalg.yield %7 : i32 + } -> tensor + %4 = linalg.generic { indexing_maps = [#map0, #map0, #map0], iterator_types = ["parallel", "parallel"] } - ins(%0, %arg1 : tensor, tensor) { - ^bb0(%arg2: i32, %arg3: i32): // no predecessors - %10 = addi %arg2, %arg3 : i32 - linalg.yield %10 : i32 - } -> tensor - return %1 : tensor + ins(%3, %arg1 : tensor, tensor) + outs(%2 : tensor) { + ^bb0(%arg2: i32, %arg3: i32, %arg4: i32): // no predecessors + %10 = addi %arg2, %arg3 : i32 + linalg.yield %10 : i32 + } -> tensor + return %4 : tensor } // CHECK-DAG: #[[$MAP0:.*]] = affine_map<(d0, d1) -> (d0, d1)> // CHECK-LABEL: func @indexed_generic_op_generic_op_fusion @@ -339,29 +413,36 @@ func @indexed_generic_op_generic_op_fusion(%arg0: tensor, #map0 = affine_map<(d0, d1) -> (d1, d0)> #map1 = affine_map<(d0, d1) -> (d0, d1)> func @indexed_generic_op_fusion(%arg0: tensor) -> tensor { - %0 = linalg.indexed_generic { - indexing_maps = [#map0, #map0], - iterator_types = ["parallel", "parallel"] } - ins(%arg0 : tensor) { - ^bb0(%arg2: index, %arg3: index, %arg4: i32): // no predecessors - %2 = index_cast %arg2 : index to i32 - %3 = index_cast %arg3 : index to i32 - %4 = addi %arg4, %2 : i32 - %5 = subi %4, %3 : i32 - linalg.yield %5 : i32 + %c0 = constant 0 : index + %c1 = constant 1 : index + %0 = dim %arg0, %c0 : tensor + %1 = dim %arg0, %c1 : tensor + %2 = linalg.init_tensor [%0, %1] : tensor + %3 = linalg.indexed_generic { + indexing_maps = [#map0, #map0], + iterator_types = ["parallel", "parallel"] } + ins(%arg0 : tensor) + outs(%2 : tensor) { + ^bb0(%arg2: index, %arg3: index, %arg4: i32, %arg5: i32): // no predecessors + %4 = index_cast %arg2 : index to i32 + %5 = index_cast %arg3 : index to i32 + %6 = addi %arg4, %4 : i32 + %7 = subi %5, %6 : i32 + linalg.yield %7 : i32 } -> tensor - %1 = linalg.indexed_generic { - indexing_maps = [#map1, #map1], - iterator_types = ["parallel", "parallel"] } - ins(%0 : tensor) { - ^bb0(%arg2: index, %arg3: index, %arg4: i32): // no predecessors - %2 = index_cast %arg2 : index to i32 - %3 = index_cast %arg3 : index to i32 - %4 = addi %arg4, %2 : i32 - %5 = subi %4, %3 : i32 - linalg.yield %5 : i32 + %4= linalg.indexed_generic { + indexing_maps = [#map1, #map1], + iterator_types = ["parallel", "parallel"] } + ins(%3 : tensor) + outs(%2 : tensor) { + ^bb0(%arg2: index, %arg3: index, %arg4: i32, %arg5: i32): // no predecessors + %5 = index_cast %arg2 : index to i32 + %6 = index_cast %arg3 : index to i32 + %7 = addi %arg4, %5 : i32 + %8 = subi %7, %6 : i32 + linalg.yield %8 : i32 } -> tensor - return %1 : tensor + return %4 : tensor } // CHECK-DAG: #[[$MAP0:.*]] = affine_map<(d0, d1) -> (d0, d1)> // CHECK-LABEL: func @indexed_generic_op_fusion @@ -374,7 +455,7 @@ func @indexed_generic_op_fusion(%arg0: tensor) -> tensor { // CHECK: %[[ADD_OPERAND1:.+]] = index_cast %[[ARG1]] : index to i32 // CHECK: %[[SUB_OPERAND1:.+]] = index_cast %[[ARG0]] : index to i32 // CHECK: %[[VAL1:.+]] = addi %[[ARG2]], %[[ADD_OPERAND1]] : i32 -// CHECK: %[[VAL2:.+]] = subi %[[VAL1]], %[[SUB_OPERAND1]] : i32 +// CHECK: %[[VAL2:.+]] = subi %[[SUB_OPERAND1]], %[[VAL1]] : i32 // CHECK: %[[ADD_OPERAND2:.+]] = index_cast %[[ARG0]] : index to i32 // CHECK: %[[SUB_OPERAND2:.+]] = index_cast %[[ARG1]] : index to i32 // CHECK: %[[VAL3:.+]] = addi %[[VAL2]], %[[ADD_OPERAND2]] : i32 @@ -389,25 +470,27 @@ func @scalar_indexed_generic_fusion { %c0 = constant 0 : index %cst = constant dense<1.000000e+00> : tensor<10xf32> - %0 = linalg.indexed_generic + %0 = linalg.init_tensor [] : tensor + %1 = linalg.indexed_generic {indexing_maps = [affine_map<() -> ()>, affine_map<() -> ()>], iterator_types = []} - ins(%arg1 : tensor) { - ^bb0(%arg2: i32): // no predecessors + ins(%arg1 : tensor) outs(%0 : tensor) { + ^bb0(%arg2: i32, %arg3: f32): // no predecessors %3 = index_cast %arg2 : i32 to index %4 = tensor.extract %arg0[%3, %c0, %c0] : tensor<5x1x1xf32> linalg.yield %4 : f32 } -> tensor - %1 = linalg.generic + %2 = linalg.init_tensor [10] : tensor<10xf32> + %3 = linalg.generic {indexing_maps = [affine_map<(d0) -> ()>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} - ins(%0, %cst : tensor, tensor<10xf32>) { - ^bb0(%arg2: f32, %arg3: f32): // no predecessors - %3 = mulf %arg2, %arg3 : f32 - linalg.yield %3 : f32 + ins(%1, %cst : tensor, tensor<10xf32>) outs(%2 : tensor<10xf32>) { + ^bb0(%arg2: f32, %arg3: f32, %arg4: f32): // no predecessors + %4 = mulf %arg2, %arg3 : f32 + linalg.yield %4 : f32 } -> tensor<10xf32> - return %1 : tensor<10xf32> + return %3 : tensor<10xf32> } // CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0) -> ()> // CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0) -> (d0)> @@ -421,3 +504,35 @@ func @scalar_indexed_generic_fusion // CHECK: tensor.extract %[[ARG0]] // CHECK: linalg.yield // CHECK return %[[T0]] + +// ----- + +func @constant_fusion(%arg0 : tensor<4xf32>) -> (tensor<4xf32>) { + %cst = constant dense<1.0> : tensor<4xf32> + %1 = linalg.init_tensor [4] : tensor<4xf32> + %2 = linalg.generic + {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>, + affine_map<(d0) -> (d0)>], + iterator_types = ["parallel"]} + ins (%arg0, %cst : tensor<4xf32>, tensor<4xf32>) + outs (%1 : tensor<4xf32>) { + ^bb0(%arg1: f32, %arg2: f32, %arg3: f32): + %3 = addf %arg1, %arg2 : f32 + linalg.yield %3 : f32 + } -> tensor<4xf32> + return %2 : tensor<4xf32> +} + +// CHECK-DAG: #[[MAP:.+]] = affine_map<(d0) -> (d0)> +// CHECK: func @constant_fusion(%[[ARG0:.+]]: tensor<4xf32>) +// CHECK-DAG: %[[CST:.+]] = constant 1.000000e+00 : f32 +// CHECK-DAG: %[[T0:.+]] = linalg.init_tensor [4] : tensor<4xf32> +// CHECK: %[[T1:.+]] = linalg.generic +// CHECK-SAME: indexing_maps = [#[[MAP]], #[[MAP]]] +// CHECK-SAME: ins(%[[ARG0]] : tensor<4xf32>) +// CHECK-SAME: outs(%[[T0]] : tensor<4xf32>) +// CHECK: ^{{.+}}( +// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: f32, %[[ARG2:[a-zA-Z0-9_]+]]: f32) +// CHECK: %[[T2:.+]] = addf %[[ARG1]], %[[CST]] +// CHECK: linalg.yield %[[T2]] +// CHECK: return %[[T1]] diff --git a/mlir/test/Dialect/Linalg/generalize-named-ops.mlir b/mlir/test/Dialect/Linalg/generalize-named-ops.mlir index 6db48af3b5730..c9f24844662fb 100644 --- a/mlir/test/Dialect/Linalg/generalize-named-ops.mlir +++ b/mlir/test/Dialect/Linalg/generalize-named-ops.mlir @@ -28,7 +28,8 @@ func @generalize_conv(%input : memref<1x225x225x3xf32>, %filter: memref<3x3x3x32 // ----- func @generalize_matmul_buffer(%A : memref<16x8xf32>, %B: memref<8x32xf32>, %C: memref<16x32xf32>) { - linalg.matmul ins(%A, %B: memref<16x8xf32>, memref<8x32xf32>) outs(%C: memref<16x32xf32>) + linalg.matmul ins(%A, %B: memref<16x8xf32>, memref<8x32xf32>) + outs(%C: memref<16x32xf32>) return } @@ -45,7 +46,7 @@ func @generalize_matmul_buffer(%A : memref<16x8xf32>, %B: memref<8x32xf32>, %C: // CHECK: linalg.generic // CHECK-SAME: indexing_maps = [#[[A_MAP]], #[[B_MAP]], #[[C_MAP]]] // CHECK-SAME: iterator_types = ["parallel", "parallel", "reduction"] -// CHECK-SAME: ins(%[[A]], %[[B]] +// CHECK-SAME: ins(%[[A]], %[[B]] // CHECK-SAME: outs(%[[C]] // CHECK: ^{{.*}}(%[[A_ARG:.+]]: f32, %[[B_ARG:.+]]: f32, %[[C_ARG:.+]]: f32) @@ -56,15 +57,16 @@ func @generalize_matmul_buffer(%A : memref<16x8xf32>, %B: memref<8x32xf32>, %C: // ----- func @generalize_matmul_tensor(%A : tensor<16x8xf32>, %B: tensor<8x32xf32>, %C: tensor<16x32xf32>) -> tensor<16x32xf32> { - %0 = linalg.matmul ins(%A, %B: tensor<16x8xf32>, tensor<8x32xf32>) init(%C: tensor<16x32xf32>) -> tensor<16x32xf32> + %0 = linalg.matmul ins(%A, %B: tensor<16x8xf32>, tensor<8x32xf32>) + outs(%C: tensor<16x32xf32>) -> tensor<16x32xf32> return %0: tensor<16x32xf32> } // CHECK: func @generalize_matmul_tensor // CHECK: linalg.generic -// CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<16x8xf32>, tensor<8x32xf32>) -// CHECK-SAME: init(%{{.+}} : tensor<16x32xf32>) +// CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<16x8xf32>, tensor<8x32xf32>) +// CHECK-SAME: outs(%{{.+}} : tensor<16x32xf32>) // CHECK: ^{{.*}}(%[[A_ARG:.+]]: f32, %[[B_ARG:.+]]: f32, %[[C_ARG:.+]]: f32) // CHECK-NEXT: %[[MUL:.+]] = mulf %[[A_ARG]], %[[B_ARG]] : f32 diff --git a/mlir/test/Dialect/Linalg/invalid.mlir b/mlir/test/Dialect/Linalg/invalid.mlir index 8e98a80e77b19..95a663d19f0da 100644 --- a/mlir/test/Dialect/Linalg/invalid.mlir +++ b/mlir/test/Dialect/Linalg/invalid.mlir @@ -77,7 +77,7 @@ func @generic_wrong_dim_in_map(%arg0: memref<1xi32>) { // ----- func @generic_one_d_view(%arg0: memref(off + i)>>) { - // expected-error @+1 {{op expected indexing_map #0 results to match view rank: 'memref (d0 + s0)>>'}} + // expected-error @+1 {{expected shaped value rank (1) to match the result rank of indexing_map #0 (2)}} linalg.generic { indexing_maps = [ affine_map<() -> (0, 0)> ], iterator_types = []} @@ -143,9 +143,9 @@ func @generic_empty_region(%arg0: memref) { func @generic_empty_region(%arg0: memref) { %f0 = constant 0.0: f32 - // expected-error @+1 {{linalg.generic' op expected region with 1 block}} + // expected-error @+1 {{linalg.generic' op expected 1 region with 1 block}} linalg.generic { - indexing_maps = [ affine_map<() -> (0)> ], + indexing_maps = [ affine_map<() -> ()> , affine_map<() -> ()> ], iterator_types = []} ins(%arg0 : memref) outs(%arg0 : memref) { @@ -155,12 +155,12 @@ func @generic_empty_region(%arg0: memref) { // ----- func @generic_mismatched_num_arguments(%arg0: memref) { - // expected-error @+1 {{op expected number of block arguments to match number of operands}} + // expected-error @+1 {{expected as many non-induction variable region arguments as the number of shaped operands}} linalg.generic { - indexing_maps = [ affine_map<() -> (0)> ], + indexing_maps = [ affine_map<() -> ()>, affine_map<() -> ()> ], iterator_types = []} - outs(%arg0 : memref) { - ^bb(%f: f32, %g: f32): + outs(%arg0, %arg0 : memref, memref) { + ^bb(%f: f32): linalg.yield %f: f32 } } @@ -168,9 +168,9 @@ func @generic_mismatched_num_arguments(%arg0: memref) { // ----- func @generic_block_arg_type(%arg0: memref) { - // expected-error @+1 {{op expected block argument 1 of the same type as elemental type of output operand: 'memref'}} + // expected-error @+1 {{expected type of bb argument #0 ('i1') to match element type of corresponding shaped operand ('f32')}} linalg.generic { - indexing_maps = [ affine_map<() -> (0)> ], + indexing_maps = [ affine_map<() -> ()> ], iterator_types = []} outs(%arg0 : memref) { ^bb(%i: i1): @@ -180,12 +180,12 @@ func @generic_block_arg_type(%arg0: memref) { // ----- -func @indexed_generic_block_arg_count(%arg0: memref) { - // expected-error @+1 {{op expected number of block arguments to match number of operands + number of loops}} +func @indexed_generic_block_arg_count(%arg0: memref) { + // expected-error @+1 {{expected as many non-induction variable region arguments as the number of shaped operands}} linalg.indexed_generic { - indexing_maps = [ affine_map<(d0) -> (d0)> ], + indexing_maps = [ affine_map<(i) -> (i)> ], iterator_types = ["parallel"]} - outs(%arg0 : memref) { + outs(%arg0 : memref) { ^bb(%f: f32): linalg.yield %f : f32 } @@ -193,12 +193,12 @@ func @indexed_generic_block_arg_count(%arg0: memref) { // ----- -func @indexed_generic_block_induction_var_arg_type(%arg0: memref) { - // expected-error @+1 {{op expected block argument 1 to be an index}} +func @indexed_generic_block_induction_var_arg_type(%arg0: memref) { + // expected-error @+1 {{op expected index block argument #0}} linalg.indexed_generic { indexing_maps = [ affine_map<(d0) -> (d0)> ], iterator_types = ["parallel"]} - outs(%arg0 : memref) { + outs(%arg0 : memref) { ^bb(%i: f64, %f: f32): linalg.yield %f: f32 } @@ -206,12 +206,12 @@ func @indexed_generic_block_induction_var_arg_type(%arg0: memref) { // ----- -func @indexed_generic_block_arg_type(%arg0: memref) { - // expected-error @+1 {{op expected block argument 2 of the same type as elemental type of output operand: 'memref'}} +func @indexed_generic_block_arg_type(%arg0: memref) { + // expected-error @+1 {{expected type of bb argument #1 ('i1') to match element type of corresponding shaped operand ('f32')}} linalg.indexed_generic { indexing_maps = [ affine_map<(d0) -> (d0)> ], iterator_types = ["parallel"]} - outs(%arg0 : memref) { + outs(%arg0 : memref) { ^bb(%i: index, %f: i1): linalg.yield %i: index } @@ -220,7 +220,7 @@ func @indexed_generic_block_arg_type(%arg0: memref) { // ----- func @indexed_generic_arg_count(%arg0: memref) { - // expected-error @+1 {{op expected number of block arguments to match number of operands + number of loops}} + // expected-error @+1 {{expected as many non-induction variable region arguments as the number of shaped operands}} linalg.indexed_generic { indexing_maps = [ affine_map<()[] -> ()> ], iterator_types = []} @@ -233,19 +233,6 @@ func @indexed_generic_arg_count(%arg0: memref) { // ----- -func @indexed_generic_induction_var_arg_type(%arg0: memref) { - // expected-error @+1 {{op expected block argument 1 to be an index}} - linalg.indexed_generic { - iterator_types = ["parallel"], - indexing_maps = [ affine_map<(i) -> (i)> ]} - outs(%arg0 : memref) { - ^bb(%0: i32, %1: f32): - linalg.yield %1: f32 - } -} - -// ----- - func @indexed_generic_result_count(%arg0: memref) { // expected-error @+6 {{op expected number of yield values (1) to match the number of operands of the enclosing LinalgOp (2)}} linalg.indexed_generic { @@ -273,19 +260,36 @@ func @generic_result_0_element_type(%arg0: memref(o // ----- -func @generic_result_tensor_type(%arg0: memref(off + i)>>) { - // expected-error @+1 {{op result #0 must be ranked tensor of any type values, but got 'f32'}} +func @generic_result_tensor_type(%arg0: memref(off + i)>>, + %arg1: tensor) { + // expected-error @+1 {{expected type of operand #1 ('tensor') to match type of corresponding result ('f32')}} %0 = linalg.generic { - indexing_maps = [ affine_map<(i) -> (i)> ], + indexing_maps = [ affine_map<(i) -> (i)> , affine_map<(i) -> (i)> ], iterator_types = ["parallel"]} - ins(%arg0 : memref(off + i)>>) { - ^bb(%i: f32): + ins(%arg0 : memref(off + i)>>) + outs(%arg1 : tensor) { + ^bb(%i: f32, %j: f32): linalg.yield %i: f32 } -> f32 } // ----- +func @generic_result_tensor_type(%arg0: memref(off + i)>>, + %arg1: tensor) { + // expected-error @+1 {{unexpected output tensor expression in indexing map #0 a.k.a 'd0' is function of reduction iterator 'd0'}} + %0 = linalg.generic { + indexing_maps = [ affine_map<(i) -> (i)> , affine_map<(i) -> (i)> ], + iterator_types = ["reduction"]} + ins(%arg0 : memref(off + i)>>) + outs(%arg1 : tensor) { + ^bb(%i: f32, %j: f32): + linalg.yield %i: f32 + } -> tensor +} + +// ----- + func @generic(%arg0: memref) { // expected-error @+2 {{op expects regions to end with 'linalg.yield', found 'std.addf'}} // expected-note @+1 {{in custom textual format, the absence of terminator implies 'linalg.yield'}} @@ -301,12 +305,17 @@ func @generic(%arg0: memref) { // ----- -func @conv_rank_limit(%arg0: memref, %arg1: memref, %arg2: memref) { - // expected-error @+1 {{expects memref ranks to be greater than 2}} - linalg.conv(%arg0, %arg1, %arg2) : memref, memref, memref -} - -// ----- +// This test is currently disabled: subject to verifier ordering issues. +// Instead, when the ranks are not greater than 2, an assertion will be triggered +// in LinalgStructuredOps.td::ConvOp::iterator_types() for now because the +// verifier inspects the iterator_types. This is slated to become an +// autogenerated op in the future, alleviating the issue. +// func @conv_rank_limit(%arg0: memref, %arg1: memref, %arg2: memref) { +// // DISABLED_expected -error @+1 {{expects memref ranks to be greater than 2}} +// linalg.conv(%arg0, %arg1, %arg2) : memref, memref, memref +// } +// +// // ----- // expected-error @+1 {{unknown Linalg type}} !invalid_type = type !linalg.unknown @@ -367,7 +376,7 @@ func @reshape(%arg0: memref) { func @pooling_rank_mismatch(%arg0: memref, %arg1: memref<2x3xf32>, %arg2: memref) { - // expected-error @+1 {{expects memref ranks to match}} + // expected-error @+1 {{expected shaped value rank (2) to match the result rank of indexing_map #1 (3)}} linalg.pooling_max(%arg0, %arg1, %arg2) {strides = [2, 1, 2]}: memref, memref<2x3xf32>, memref return @@ -376,7 +385,7 @@ func @pooling_rank_mismatch(%arg0: memref, // ----- func @named_ops(%a3: memref, %b3: memref, %c3: memref) { - // expected-error @+1 {{op expected indexing_map #1 results to match view rank: 'memref'}} + // expected-error @+1 {{expected shaped value rank (2) to match the result rank of indexing_map #1 (3)}} linalg.batch_matmul ins(%a3, %b3: memref, memref) outs(%c3 : memref) return @@ -384,18 +393,8 @@ func @named_ops(%a3: memref, %b3: memref, %c3: memref, %t: tensor) { - // expected-error @+1 {{expected empty `init` when op has no results or no reduction dims}} - linalg.matmul ins(%m, %m: memref, memref) - outs(%m : memref) - init(%t : tensor) - return -} - -// ----- - func @incorrect_region_arg_count(%m: memref) { - // expected-error @+3 {{region expects 3 args, got 4}} + // expected-error @+3 {{region expects 3 args, got 2}} %res = linalg.matmul ins(%m, %m : memref, memref) -> tensor, tensor return @@ -403,30 +402,10 @@ func @incorrect_region_arg_count(%m: memref) { // ----- -func @single_tensor_result(%m: memref, %t: tensor) { - // expected-error @+1 {{expected single tensor result when reduction present}} - %res:2 = linalg.matmul ins(%m : memref) - init(%t, %t : tensor, tensor) - -> tensor, tensor - return -} - -// ----- - -func @matching_inits(%m: memref, %t: tensor) { - // expected-error @+1 {{expected #init tensors to match #results when reduction present}} - %res = linalg.matmul ins(%m, %m : memref, memref) - init(%t, %t : tensor, tensor) - -> tensor - return -} - -// ----- - func @matching_inits(%m: memref, %t: tensor) { - // expected-error @+1 {{expected init tensor #0 of the same type as result #0}} + // expected-error @+1 {{expected type of operand #2 ('tensor') to match type of corresponding result ('tensor')}} %res = linalg.matmul ins(%m, %m : memref, memref) - init(%t : tensor) + outs(%t : tensor) -> tensor return } diff --git a/mlir/test/Dialect/Linalg/parallel-loops.mlir b/mlir/test/Dialect/Linalg/parallel-loops.mlir index 95eb997f4dbda..8d365af6a5a38 100644 --- a/mlir/test/Dialect/Linalg/parallel-loops.mlir +++ b/mlir/test/Dialect/Linalg/parallel-loops.mlir @@ -64,7 +64,7 @@ func @lower_outer_parallel(%A: memref, %B: memref) { #accesses = [ affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d3, d4, d5)>, - affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d2, d4, d5)> + affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d4, d3)> ] #trait = { iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"], @@ -94,4 +94,4 @@ func @lower_mixed_parallel(%A: memref, %B: memref) // CHECK: scf.parallel (%[[IV3:.*]], %[[IV4:.*]]) = (%[[C0]], %[[C0]]) to (%[[D3]], %[[D4]]) step (%[[C1]], %[[C1]]) // CHECK: scf.for %[[IV5:.*]] = %[[C0]] to %[[D5]] step %[[C1]] // CHECK: load %{{.*}}[%[[IV0]], %[[IV1]], %[[IV2]], %[[IV3]], %[[IV4]], %[[IV5]]] -// CHECK: store %{{.*}}, %{{.*}}[%[[IV0]], %[[IV2]], %[[IV4]], %[[IV5]]] +// CHECK: store %{{.*}}, %{{.*}}[%[[IV0]], %[[IV1]], %[[IV4]], %[[IV3]]] diff --git a/mlir/test/Dialect/Linalg/reshape_fusion.mlir b/mlir/test/Dialect/Linalg/reshape_fusion.mlir index 66e07cc56d65b..92805218dde7d 100644 --- a/mlir/test/Dialect/Linalg/reshape_fusion.mlir +++ b/mlir/test/Dialect/Linalg/reshape_fusion.mlir @@ -1,20 +1,21 @@ -// RUN: mlir-opt %s -linalg-fusion-for-tensor-ops -split-input-file | FileCheck %s +// RUN: mlir-opt %s -linalg-fusion-for-tensor-ops -split-input-file -verify-each=0 | FileCheck %s #map0 = affine_map<(d0, d1, d2) -> (d2, d0, d1)> #map1 = affine_map<(d0, d1, d2) -> (d1, d2, d0)> -func @generic_op_reshape_producer_fusion(%arg0 : tensor, +func @generic_op_reshape_producer_fusion(%arg0 : tensor, %arg1 : tensor) -> tensor { %0 = linalg.tensor_reshape %arg0 [affine_map<(i, j, k, l) -> (i)>, affine_map<(i, j, k, l) -> (j, k)>, affine_map<(i, j, k, l) -> (l)>] : - tensor into tensor + tensor into tensor %1 = linalg.generic { indexing_maps = [#map0, #map1, #map1], iterator_types = ["parallel", "parallel", "parallel"]} - ins(%0, %arg1 : tensor, tensor) { - ^bb0(%arg3: f32, %arg4: f32): // no predecessors + ins(%0, %arg1 : tensor, tensor) + outs(%0 : tensor) { + ^bb0(%arg3: f32, %arg4: f32, %s: f32): // no predecessors %1 = mulf %arg3, %arg4 : f32 linalg.yield %1 : f32 } -> tensor @@ -22,44 +23,58 @@ func @generic_op_reshape_producer_fusion(%arg0 : tensor, } // CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2, d3) -> (d0)> -// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2, d3) -> (d1)> -// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2, d3) -> (d2, d3)> -// CHECK-DAG: #[[MAP3:.+]] = affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)> -// CHECK-DAG: #[[MAP4:.+]] = affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)> +// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2, d3) -> (d1, d2)> +// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2, d3) -> (d3)> +// CHECK-DAG: #[[MAP3:.+]] = affine_map<(d0, d1, d2, d3) -> (d1)> +// CHECK-DAG: #[[MAP4:.+]] = affine_map<(d0, d1, d2, d3) -> (d2, d3)> +// CHECK-DAG: #[[MAP5:.+]] = affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)> +// CHECK-DAG: #[[MAP6:.+]] = affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)> // CHECK: func @generic_op_reshape_producer_fusion -// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: tensor +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: tensor // CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: tensor -// CHECK: %[[T0:.+]] = linalg.tensor_reshape %[[ARG1]] +// CHECK-DAG: %[[C0:.+]] = constant 0 : index +// CHECK-DAG: %[[C1:.+]] = constant 1 : index +// CHECK-DAG: %[[C2:.+]] = constant 2 : index +// CHECK-DAG: %[[C4:.+]] = constant 4 : index +// CHECK: %[[T0:.+]] = linalg.tensor_reshape %[[ARG0]] // CHECK-SAME: [#[[MAP0]], #[[MAP1]], #[[MAP2]]] -// CHECK-SAME: tensor into tensor -// CHECK: %[[T1:.+]] = linalg.generic -// CHECK-SAME: indexing_maps = [#[[MAP3]], #[[MAP4]], #[[MAP4]]] +// CHECK: %[[T1:.+]] = linalg.tensor_reshape %[[ARG1]] +// CHECK-SAME: [#[[MAP0]], #[[MAP3]], #[[MAP4]]] +// CHECK-DAG: %[[D0:.+]] = dim %[[T0]], %[[C0]] +// CHECK-DAG: %[[D1:.+]] = dim %[[T0]], %[[C1]] +// CHECK-DAG: %[[D2:.+]] = dim %[[T0]], %[[C2]] +// CHECK: %[[D3:.+]] = divi_unsigned %[[D0]], %[[C4]] +// CHECK: %[[T2:.+]] = linalg.init_tensor [%[[D1]], %[[D2]], %[[D3]], 4] +// CHECK: %[[T3:.+]] = linalg.generic +// CHECK-SAME: indexing_maps = [#[[MAP5]], #[[MAP6]], #[[MAP6]]] // CHECK-SAME: ["parallel", "parallel", "parallel", "parallel"] -// CHECK-SAME: ins(%[[ARG0]], %[[T0]] : tensor, tensor) -// CHECK: %[[T2:.+]] = linalg.tensor_reshape -// CHECK-SAME: [#[[MAP0]], #[[MAP1]], #[[MAP2]]] -// CHECK-SAME: tensor into tensor -// CHECK: return %[[T2]] +// CHECK-SAME: ins(%[[ARG0]], %[[T1]] : tensor, tensor) +// CHECK-SAME: outs(%[[T2]] : tensor) +// CHECK: %[[T4:.+]] = linalg.tensor_reshape %[[T3]] +// CHECK-SAME: [#[[MAP0]], #[[MAP3]], #[[MAP4]]] +// CHECK-SAME: tensor into tensor +// CHECK: return %[[T4]] // ----- #map0 = affine_map<(d0, d1) -> (d0, d1)> func @generic_op_reshape_consumer_fusion(%arg0 : tensor, %arg1 : tensor) -> - tensor + tensor { %0 = linalg.generic { indexing_maps = [#map0, #map0, #map0], iterator_types = ["parallel", "parallel"]} - ins(%arg0, %arg1 : tensor, tensor) { - ^bb0(%arg3: f32, %arg4: f32): // no predecessors + ins(%arg0, %arg1 : tensor, tensor) + outs(%arg0 : tensor) { + ^bb0(%arg3: f32, %arg4: f32, %s: f32): // no predecessors %1 = mulf %arg3, %arg4 : f32 linalg.yield %1 : f32 } -> tensor %1 = linalg.tensor_reshape %0 [affine_map<(i, j, k, l) -> (i)>, affine_map<(i, j, k, l) -> (j, k, l)>] : - tensor into tensor - return %1 : tensor + tensor into tensor + return %1 : tensor } // CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2, d3) -> (d0)> @@ -68,31 +83,40 @@ func @generic_op_reshape_consumer_fusion(%arg0 : tensor, // CHECK: func @generic_op_reshape_consumer_fusion // CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: tensor // CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: tensor +// CHECK-DAG: %[[C0:.+]] = constant 0 : index +// CHECK-DAG: %[[C1:.+]] = constant 1 : index +// CHECK-DAG: %[[C20:.+]] = constant 20 : index // CHECK: %[[T0:.+]] = linalg.tensor_reshape %[[ARG0]] // CHECK-SAME: [#[[MAP0]], #[[MAP1]]] -// CHECK-SAME: tensor into tensor +// CHECK-SAME: tensor into tensor // CHECK: %[[T1:.+]] = linalg.tensor_reshape %[[ARG1]] // CHECK-SAME: [#[[MAP0]], #[[MAP1]]] -// CHECK-SAME: tensor into tensor -// CHECK: %[[T2:.+]] = linalg.generic +// CHECK-SAME: tensor into tensor +// CHECK-DAG: %[[D0:.+]] = dim %[[ARG0]], %[[C0]] +// CHECK-DAG: %[[D1:.+]] = dim %[[ARG0]], %[[C1]] +// CHECK: %[[D2:.+]] = divi_unsigned %[[D1]], %[[C20]] +// CHECK: %[[T2:.+]] = linalg.init_tensor [%[[D0]], 4, %[[D2]], 5] +// CHECK: %[[T3:.+]] = linalg.generic // CHECK-SAME: indexing_maps = [#[[MAP2]], #[[MAP2]], #[[MAP2]]] // CHECK-SAME: ["parallel", "parallel", "parallel", "parallel"] -// CHECK-SAME: ins(%[[T0]], %[[T1]] : tensor, tensor) -// CHECK: return %[[T2]] : tensor +// CHECK-SAME: ins(%[[T0]], %[[T1]] : tensor, tensor) +// CHECK-SAME: outs(%[[T2]] : tensor) +// CHECK: return %[[T3]] : tensor // ----- func @reshape_as_consumer_permutation (%a : tensor, %b : tensor) - -> tensor { + -> tensor { %c = linalg.generic { indexing_maps = [affine_map<(d0, d1, d2) -> (d1, d0, d2)>, affine_map<(d0, d1, d2) -> (d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d2, d1)>], iterator_types = ["parallel", "parallel", "parallel"]} - ins(%a, %b : tensor, tensor) { - ^bb0(%arg0 : f32, %arg1: f32): + ins(%a, %b : tensor, tensor) + outs(%a : tensor) { + ^bb0(%arg0 : f32, %arg1: f32, %s: f32): %1 = addf %arg0, %arg1 : f32 linalg.yield %1 : f32 } -> tensor @@ -100,8 +124,8 @@ func @reshape_as_consumer_permutation [affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1)>, affine_map<(d0, d1, d2, d3, d4, d5) -> (d2)>, affine_map<(d0, d1, d2, d3, d4, d5) -> (d3, d4, d5)>] - : tensor into tensor - return %d : tensor + : tensor into tensor + return %d : tensor } // CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2)> // CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d3, d4)> @@ -114,17 +138,28 @@ func @reshape_as_consumer_permutation // CHECK: func @reshape_as_consumer_permutation // CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: tensor // CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: tensor +// CHECK-DAG: %[[C0:.+]] = constant 0 : index +// CHECK-DAG: %[[C1:.+]] = constant 1 : index +// CHECK-DAG: %[[C2:.+]] = constant 2 : index +// CHECK-DAG: %[[C12:.+]] = constant 12 : index // CHECK: %[[T0:.+]] = linalg.tensor_reshape %[[ARG0]] // CHECK-SAME: [#[[MAP0]], #[[MAP1]], #[[MAP2]]] -// CHECK-SAME: tensor into tensor +// CHECK-SAME: tensor into tensor<3x4x?x?x2x?xf32> // CHECK: %[[T1:.+]] = linalg.tensor_reshape %[[ARG1]] // CHECK-SAME: [#[[MAP3]], #[[MAP4]]] -// CHECK-SAME: tensor into tensor -// CHECK: %[[T2:.+]] = linalg.generic +// CHECK-SAME: tensor into tensor<3x4x?x?xf32> +// CHECK-DAG: %[[D0:.+]] = dim %[[ARG0]], %[[C0]] +// CHECK: %[[D1:.+]] = divi_unsigned %[[D0]], %[[C2]] +// CHECK-DAG: %[[D2:.+]] = dim %[[ARG0]], %[[C2]] +// CHECK-DAG: %[[D3:.+]] = dim %[[ARG0]], %[[C1]] +// CHECK-DAG: %[[D4:.+]] = divi_unsigned %[[D3]], %[[C12]] +// CHECK: %[[T2:.+]] = linalg.init_tensor [%[[D1]], 2, %[[D2]], 3, 4, %[[D4]]] +// CHECK: %[[T3:.+]] = linalg.generic // CHECK-SAME: indexing_maps = [#[[MAP5]], #[[MAP6]], #[[MAP7]]] // CHECK-SAME: ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel"] -// CHECK-SAME: ins(%[[T0]], %[[T1]] : tensor, tensor) -// CHECK: return %[[T2]] : tensor +// CHECK-SAME: ins(%[[T0]], %[[T1]] : tensor<3x4x?x?x2x?xf32>, tensor<3x4x?x?xf32>) +// CHECK-SAME: outs(%[[T2]] : tensor) +// CHECK: return %[[T3]] : tensor // ----- @@ -138,8 +173,9 @@ func @generic_op_reshape_consumer_static(%arg0: tensor<264x4xf32>) %0 = linalg.generic { indexing_maps = [#map0, #map0, #map0], iterator_types = ["parallel", "parallel"]} - ins(%arg0, %cst : tensor<264x4xf32>, tensor<264x4xf32>) { - ^bb0(%arg1: f32, %arg2: f32): // no predecessors + ins(%arg0, %cst : tensor<264x4xf32>, tensor<264x4xf32>) + outs(%arg0 : tensor<264x4xf32>) { + ^bb0(%arg1: f32, %arg2: f32, %s: f32): // no predecessors %2 = mulf %arg1, %arg2 : f32 linalg.yield %2 : f32 } -> tensor<264x4xf32> @@ -156,21 +192,27 @@ func @generic_op_reshape_consumer_static(%arg0: tensor<264x4xf32>) // CHECK: %[[T0:.+]] = linalg.tensor_reshape %[[ARG0]] // CHECK-SAME: [#[[MAP0]], #[[MAP1]]] // CHECK-SAME: tensor<264x4xf32> into tensor<8x33x4xf32> -// CHECK: %[[T1:.+]] = linalg.generic +// CHECK: %[[T1:.+]] = linalg.init_tensor [8, 33, 4] : tensor<8x33x4xf32> +// CHECK: %[[T2:.+]] = linalg.generic // CHECK-SAME: indexing_maps = [#[[MAP2]], #[[MAP2]]] // CHECK-SAME: ["parallel", "parallel", "parallel"] // CHECK-SAME: ins(%[[T0]] : tensor<8x33x4xf32>) -// CHECK: return %[[T1]] : tensor<8x33x4xf32> +// CHECK-SAME: outs(%[[T1]] : tensor<8x33x4xf32>) +// CHECK: return %[[T2]] : tensor<8x33x4xf32> // ----- -func @scalar_reshape(%arg0 : tensor<1x10xf32>, %arg1 : tensor<1xf32>) - -> tensor<1x10xf32> { +func @scalar_reshape( + %arg0 : tensor<1x10xf32>, %arg1 : tensor<1xf32>, %shape : tensor<10xf32>) + -> tensor<1x10xf32> +{ %0 = linalg.tensor_reshape %arg1 [] : tensor<1xf32> into tensor %1 = linalg.generic {indexing_maps = [affine_map<(d0) -> ()>, affine_map<(d0) -> (d0)>], - iterator_types = ["parallel"]} ins(%0 : tensor) { - ^bb0(%arg2: f32): // no predecessors + iterator_types = ["parallel"]} + ins(%0 : tensor) + outs(%shape : tensor<10xf32>) { + ^bb0(%arg2: f32, %s: f32): // no predecessors linalg.yield %arg2 : f32 } -> tensor<10xf32> %2 = linalg.tensor_reshape %1 [affine_map<(d0, d1) -> (d0, d1)>] @@ -185,11 +227,13 @@ func @scalar_reshape(%arg0 : tensor<1x10xf32>, %arg1 : tensor<1xf32>) // CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: tensor<1xf32> // CHECK: %[[T0:.+]] = linalg.tensor_reshape %[[ARG1]] [] // CHECK-SAME: tensor<1xf32> into tensor -// CHECK: %[[T1:.+]] = linalg.generic +// CHECK: %[[T1:.+]] = linalg.init_tensor [1, 10] : tensor<1x10xf32> +// CHECK: %[[T2:.+]] = linalg.generic // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]]] // CHECK-SAME: iterator_types = ["parallel", "parallel"] // CHECK-SAME: ins(%[[T0]] : tensor) -// CHECK: return %[[T1]] : tensor<1x10xf32> +// CHECK-SAME: outs(%[[T1]] : tensor<1x10xf32>) +// CHECK: return %[[T2]] : tensor<1x10xf32> // ----- @@ -206,8 +250,9 @@ func @indexed_generic_op_reshape_producer_fusion(%arg0 : tensor, %1 = linalg.indexed_generic { indexing_maps = [#map0, #map1, #map1], iterator_types = ["parallel", "parallel", "parallel"]} - ins(%0, %arg1 : tensor, tensor) { - ^bb0(%arg3 : index, %arg4 : index, %arg5 : index, %arg6: i32, %arg7: i32): + ins(%0, %arg1 : tensor, tensor) + outs(%0 : tensor) { + ^bb0(%arg3 : index, %arg4 : index, %arg5 : index, %arg6: i32, %arg7: i32, %s: i32): %1 = muli %arg6, %arg7 : i32 %2 = index_cast %arg3 : index to i32 %3 = addi %1, %2 : i32 @@ -228,7 +273,8 @@ func @indexed_generic_op_reshape_producer_fusion(%arg0 : tensor, // CHECK: ^{{.*}}( // CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]]: index, %[[ARG3:[a-zA-Z0-9]+]]: index, // CHECK-SAME: %[[ARG4:[a-zA-Z0-9]+]]: index, %[[ARG5:[a-zA-Z0-9]+]]: index, -// CHECK-SAME: %[[ARG6:[a-zA-Z0-9]+]]: i32, %[[ARG7:[a-zA-Z0-9]+]]: i32) +// CHECK-SAME: %[[ARG6:[a-zA-Z0-9]+]]: i32, %[[ARG7:[a-zA-Z0-9]+]]: i32, +// CHECK-SAME: %[[ARG8:[a-zA-Z0-9]+]]: i32) // CHECK: %[[T3:.+]] = affine.apply #[[MAP]](%[[ARG2]], %[[ARG3]]) // CHECK: %[[T4:.+]] = muli %[[ARG6]], %[[ARG7]] // CHECK: %[[T5:.+]] = index_cast %[[T3]] @@ -249,8 +295,9 @@ func @indexed_generic_op_reshape_consumer_fusion(%arg0 : tensor, %0 = linalg.indexed_generic { indexing_maps = [#map0, #map0, #map0], iterator_types = ["parallel", "parallel"]} - ins(%arg0, %arg1 : tensor, tensor) { - ^bb0(%arg3 : index, %arg4 : index, %arg5: i32, %arg6: i32): // no predecessors + ins(%arg0, %arg1 : tensor, tensor) + outs(%arg0 : tensor) { + ^bb0(%arg3 : index, %arg4 : index, %arg5: i32, %arg6: i32, %s: i32): // no predecessors %1 = muli %arg5, %arg6 : i32 %2 = index_cast %arg3 : index to i32 %3 = addi %1, %2 : i32 @@ -271,7 +318,8 @@ func @indexed_generic_op_reshape_consumer_fusion(%arg0 : tensor, // CHECK: ^{{.*}}( // CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]]: index, %[[ARG3:[a-zA-Z0-9]+]]: index, // CHECK-SAME: %[[ARG4:[a-zA-Z0-9]+]]: index, %[[ARG5:[a-zA-Z0-9]+]]: index, -// CHECK-SAME: %[[ARG6:[a-zA-Z0-9]+]]: i32, %[[ARG7:[a-zA-Z0-9]+]]: i32) +// CHECK-SAME: %[[ARG6:[a-zA-Z0-9]+]]: i32, %[[ARG7:[a-zA-Z0-9]+]]: i32, +// CHECK-SAME: %[[ARG8:[a-zA-Z0-9]+]]: i32) // CHECK: %[[T3:.+]] = affine.apply #[[MAP]](%[[ARG3]], %[[ARG4]], %[[ARG5]]) // CHECK: %[[T4:.+]] = muli %[[ARG6]], %[[ARG7]] // CHECK: %[[T5:.+]] = index_cast %[[ARG2]] @@ -283,15 +331,16 @@ func @indexed_generic_op_reshape_consumer_fusion(%arg0 : tensor, // ----- func @reshape_as_consumer_permutation - (%a : tensor<210x6x4xi32>, %b : tensor<210x4xi32>) + (%a : tensor<210x6x4xi32>, %b : tensor<210x4xi32>, %shape : tensor<6x4x210xi32>) -> tensor<2x3x4x5x6x7xi32> { %c = linalg.indexed_generic { indexing_maps = [affine_map<(d0, d1, d2) -> (d1, d0, d2)>, affine_map<(d0, d1, d2) -> (d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d2, d1)>], iterator_types = ["parallel", "parallel", "parallel"]} - ins(%a, %b : tensor<210x6x4xi32>, tensor<210x4xi32>) { - ^bb0(%arg0 : index, %arg1 : index, %arg2 : index, %arg3 : i32, %arg4: i32): + ins(%a, %b : tensor<210x6x4xi32>, tensor<210x4xi32>) + outs(%shape : tensor<6x4x210xi32>) { + ^bb0(%arg0 : index, %arg1 : index, %arg2 : index, %arg3 : i32, %arg4: i32, %s: i32): %1 = addi %arg3, %arg4 : i32 %2 = index_cast %arg0 : index to i32 %3 = addi %1, %2 : i32 @@ -327,36 +376,42 @@ func @reshape_as_consumer_permutation // CHECK-SAME: [#[[MAP0]], #[[MAP1]], #[[MAP2]]] // CHECK-DAG: %[[T1:.+]] = linalg.tensor_reshape %[[ARG1]] // CHECK-SAME: [#[[MAP3]], #[[MAP4]]] -// CHECK: %[[T2:.+]] = linalg.indexed_generic +// CHECK: %[[T2:.+]] = linalg.init_tensor [2, 3, 4, 5, 6, 7] +// CHECK: %[[T3:.+]] = linalg.indexed_generic // CHECK-SAME: indexing_maps = [#[[MAP7]], #[[MAP8]], #[[MAP9]]] -// CHECK-SAME: ins(%[[T0]], %[[T1]] : tensor<{{.+}}>, tensor<{{.+}}>) +// CHECK-SAME: ins(%[[T0]], %[[T1]] : tensor<5x6x7x2x3x4xi32>, tensor<5x6x7x4xi32>) +// CHECK-SAME: outs(%[[T2]] : tensor<2x3x4x5x6x7xi32>) // CHECK: ^{{.+}}( // CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]]: index, %[[ARG3:[a-zA-Z0-9]+]]: index, // CHECK-SAME: %[[ARG4:[a-zA-Z0-9]+]]: index, %[[ARG5:[a-zA-Z0-9]+]]: index, // CHECK-SAME: %[[ARG6:[a-zA-Z0-9]+]]: index, %[[ARG7:[a-zA-Z0-9]+]]: index, -// CHECK-SAME: %[[ARG8:[a-zA-Z0-9]+]]: i32, %[[ARG9:[a-zA-Z0-9]+]]: i32) -// CHECK-DAG: %[[T3:.+]] = affine.apply #[[MAP5]](%[[ARG2]], %[[ARG3]]) -// CHECK-DAG: %[[T4:.+]] = affine.apply #[[MAP6]](%[[ARG4]], %[[ARG5]], %[[ARG6]]) -// CHECK-DAG: %[[T5:.+]] = addi %[[ARG8]], %[[ARG9]] -// CHECK: %[[T6:.+]] = index_cast %[[T3]] -// CHECK: %[[T7:.+]] = addi %[[T5]], %[[T6]] -// CHECK: %[[T8:.+]] = index_cast %[[T4]] -// CHECK: %[[T9:.+]] = addi %[[T7]], %[[T8]] -// CHECK: %[[T10:.+]] = index_cast %[[ARG7]] -// CHECK: %[[T11:.+]] = addi %[[T9]], %[[T10]] +// CHECK-SAME: %[[ARG8:[a-zA-Z0-9]+]]: i32, %[[ARG9:[a-zA-Z0-9]+]]: i32, +// CHECK-SAME: %[[ARG10:[a-zA-Z0-9]+]]: i32) +// CHECK-DAG: %[[T4:.+]] = affine.apply #[[MAP5]](%[[ARG2]], %[[ARG3]]) +// CHECK-DAG: %[[T5:.+]] = affine.apply #[[MAP6]](%[[ARG4]], %[[ARG5]], %[[ARG6]]) +// CHECK-DAG: %[[T6:.+]] = addi %[[ARG8]], %[[ARG9]] +// CHECK: %[[T7:.+]] = index_cast %[[T4]] +// CHECK: %[[T8:.+]] = addi %[[T6]], %[[T7]] +// CHECK: %[[T9:.+]] = index_cast %[[T5]] +// CHECK: %[[T10:.+]] = addi %[[T8]], %[[T9]] +// CHECK: %[[T11:.+]] = index_cast %[[ARG7]] +// CHECK: %[[T12:.+]] = addi %[[T10]], %[[T11]] // ----- -func @reshape_as_producer_projected_permutation - (%arg0 : tensor<33x8x?xi32>) -> tensor<264x?x4xi32> { +func @reshape_as_producer_projected_permutation( + %arg0 : tensor<33x8x?xi32>, %shape : tensor<264x?x4xi32>) -> tensor<264x?x4xi32> +{ %0 = linalg.tensor_reshape %arg0 [affine_map<(d0, d1, d2) -> (d0, d1)>, affine_map<(d0, d1, d2) -> (d2)>] : tensor<33x8x?xi32> into tensor<264x?xi32> %1 = linalg.indexed_generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], - iterator_types = ["parallel", "parallel", "parallel"]} ins(%0 : tensor<264x?xi32>) { - ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: i32): // no predecessors + iterator_types = ["parallel", "parallel", "parallel"]} + ins(%0 : tensor<264x?xi32>) + outs(%shape : tensor<264x?x4xi32>) { + ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: i32, %s: i32): // no predecessors %2 = index_cast %arg1 : index to i32 %3 = addi %arg4, %2 : i32 %4 = index_cast %arg2 : index to i32 @@ -384,7 +439,8 @@ func @reshape_as_producer_projected_permutation // CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]]: index, // CHECK-SAME: %[[ARG3:[a-zA-Z0-9]+]]: index, // CHECK-SAME: %[[ARG4:[a-zA-Z0-9]+]]: index, -// CHECK-SAME: %[[ARG5:[a-zA-Z0-9]+]]: i32) +// CHECK-SAME: %[[ARG5:[a-zA-Z0-9]+]]: i32, +// CHECK-SAME: %[[ARG7:[a-zA-Z0-9]+]]: i32) // CHECK: %[[T0:.+]] = affine.apply #[[MAP2]](%[[ARG1]], %[[ARG2]]) // CHECK: %[[T1:.+]] = index_cast %[[T0]] : index to i32 // CHECK: %[[T2:.+]] = addi %[[ARG5]], %[[T1]] : i32 @@ -409,8 +465,9 @@ func @generic_op_reshape_consumer_fusion_projected(%arg0 : tensor, %0 = linalg.generic { indexing_maps = [#map0, #map0, #map1], iterator_types = ["parallel", "parallel"]} - ins(%arg0, %arg1 : tensor, tensor) { - ^bb0(%arg3: f32, %arg4: f32): // no predecessors + ins(%arg0, %arg1 : tensor, tensor) + outs(%arg0 : tensor) { + ^bb0(%arg3: f32, %arg4: f32, %s: f32): // no predecessors %1 = mulf %arg3, %arg4 : f32 linalg.yield %1 : f32 } -> tensor diff --git a/mlir/test/Dialect/Linalg/reshape_linearization_fusion.mlir b/mlir/test/Dialect/Linalg/reshape_linearization_fusion.mlir index 468ae80a1288b..aff1447a63c7b 100644 --- a/mlir/test/Dialect/Linalg/reshape_linearization_fusion.mlir +++ b/mlir/test/Dialect/Linalg/reshape_linearization_fusion.mlir @@ -1,9 +1,5 @@ // RUN: mlir-opt -split-input-file -linalg-fold-reshape-ops-by-linearization %s | FileCheck %s - -// CHECK-DAG: #[[$MAP0:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1 * 4 + d2, d3)> -// CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> - #map0 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> func @generic_op_reshape_producer_fusion(%arg0 : tensor, %arg1 : tensor) -> @@ -14,37 +10,39 @@ func @generic_op_reshape_producer_fusion(%arg0 : tensor, affine_map<(i, j, k, l) -> (l)>] : tensor into tensor %1 = linalg.generic { - indexing_maps = [#map0, #map0, #map0], - iterator_types = ["parallel", "parallel", "parallel", "parallel"]} - ins(%0, %arg1 : tensor, tensor) { - ^bb0(%arg3: f32, %arg4: f32): // no predecessors + indexing_maps = [#map0, #map0, #map0], + iterator_types = ["parallel", "parallel", "parallel", "parallel"]} + ins(%0, %arg1 : tensor, tensor) + outs(%0 : tensor) { + ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): // no predecessors %1 = mulf %arg3, %arg4 : f32 linalg.yield %1 : f32 } -> tensor return %1 : tensor } -// CHECK-LABEL: func @generic_op_reshape_producer_fusion +// CHECK-DAG: #[[$MAP0:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1 * 4 + d2, d3)> +// CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> +// CHECK: func @generic_op_reshape_producer_fusion +// CHECK-SAME: %[[ARG0:.+]]: tensor // CHECK: linalg.generic // CHECK-SAME: indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]] -// CHECK-NOT: linalg.generic - +// CHECK-SAME: ins(%[[ARG0]], %{{.+}} : tensor, tensor) +// CHECK-SAME: outs(%{{.+}} : tensor) // ----- -// CHECK-DAG: #[[$MAP0:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> -// CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1 * 20 + d2 * 5 + d3)> - #map0 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> func @generic_op_reshape_consumer_fusion(%arg0 : tensor, %arg1 : tensor) -> tensor { %0 = linalg.generic { - indexing_maps = [#map0, #map0, #map0], - iterator_types = ["parallel", "parallel", "parallel", "parallel"]} - ins(%arg0, %arg1 : tensor, tensor) { - ^bb0(%arg3: f32, %arg4: f32): // no predecessors + indexing_maps = [#map0, #map0, #map0], + iterator_types = ["parallel", "parallel", "parallel", "parallel"]} + ins(%arg0, %arg1 : tensor, tensor) + outs(%arg0 : tensor){ + ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): // no predecessors %1 = mulf %arg3, %arg4 : f32 linalg.yield %1 : f32 } -> tensor @@ -54,10 +52,21 @@ func @generic_op_reshape_consumer_fusion(%arg0 : tensor, return %1 : tensor } -// CHECK-LABEL: func @generic_op_reshape_consumer_fusion -// CHECK: linalg.generic -// CHECK-SAME: indexing_maps = [#[[$MAP0]], #[[$MAP0]], #[[$MAP1]]] -// CHECK-NOT: linalg.generic + +// CHECK-DAG: #[[$MAP0:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> +// CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1 * 20 + d2 * 5 + d3)> +// CHECK: func @generic_op_reshape_consumer_fusion +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: tensor +// CHECK-DAG: %[[C0:.+]] = constant 0 : index +// CHECK-DAG: %[[C1:.+]] = constant 1 : index +// CHECK-DAG: %[[C20:.+]] = constant 20 : index +// CHECK: %[[T0:.+]] = dim %[[ARG0]], %[[C0]] +// CHECK: %[[T1:.+]] = dim %[[ARG0]], %[[C1]] +// CHECK: %[[T2:.+]] = muli %[[T1]], %[[C20]] +// CHECK: %[[T3:.+]] = linalg.init_tensor [%[[T0]], %[[T2]]] +// CHECK: linalg.generic +// CHECK-SAME: indexing_maps = [#[[$MAP0]], #[[$MAP0]], #[[$MAP1]]] +// CHECK-SAME: outs(%[[T3]] : tensor) // ----- @@ -69,8 +78,9 @@ func @generic_op_reshape_consumer_nofusion(%arg0 : tensor, %0 = linalg.generic { indexing_maps = [#map0, #map0, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} - ins(%arg0, %arg1 : tensor, tensor) { - ^bb0(%arg3: f32, %arg4: f32): // no predecessors + ins(%arg0, %arg1 : tensor, tensor) + outs(%arg0 : tensor) { + ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): // no predecessors %1 = mulf %arg3, %arg4 : f32 linalg.yield %1 : f32 } -> tensor @@ -81,14 +91,11 @@ func @generic_op_reshape_consumer_nofusion(%arg0 : tensor, } // CHECK-LABEL: func @generic_op_reshape_consumer_nofusion -// CHECK: linalg.tensor_reshape +// CHECK: %[[T0:.+]] = linalg.generic +// CHECK: linalg.tensor_reshape %[[T0]] // ----- - -// CHECK-DAG: #[[$MAP0:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1 * 4 + d2, d3)> -// CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> - #map0 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> func @indexed_generic_op_reshape_producer_fusion(%arg0 : tensor) -> tensor { @@ -99,8 +106,9 @@ func @indexed_generic_op_reshape_producer_fusion(%arg0 : tensor) %1 = linalg.indexed_generic { indexing_maps = [#map0, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"] } - ins(%0 : tensor) { - ^bb0(%arg2: index, %arg3: index, %arg4: index, %arg5: index, %arg6: i32): // no predecessors + ins(%0 : tensor) + outs(%0 : tensor) { + ^bb0(%arg2: index, %arg3: index, %arg4: index, %arg5: index, %arg6: i32, %arg7 : i32): // no predecessors %2 = index_cast %arg2 : index to i32 %3 = addi %arg6, %2 : i32 linalg.yield %3 : i32 @@ -108,25 +116,24 @@ func @indexed_generic_op_reshape_producer_fusion(%arg0 : tensor) return %1 : tensor } -// CHECK-LABEL: func @indexed_generic_op_reshape_producer_fusion -// CHECK-NOT: linalg.tensor_reshape -// CHECK: linalg.indexed_generic -// CHECK-SAME: indexing_maps = [#[[$MAP0]], #[[$MAP1]]] -// CHECK-NOT: linalg.tensor_reshape +// CHECK-DAG: #[[$MAP0:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1 * 4 + d2, d3)> +// CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> +// CHECK: func @indexed_generic_op_reshape_producer_fusion +// CHECK-SAME: %[[ARG0:.+]]: tensor +// CHECK: linalg.indexed_generic +// CHECK-SAME: indexing_maps = [#[[$MAP0]], #[[$MAP1]]] +// CHECK-SAME: ins(%[[ARG0]] : tensor) // ----- -// CHECK-DAG: #[[$MAP0:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> -// CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1 * 20 + d2 * 5 + d3)> - #map0 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> func @indexed_generic_op_reshape_consumer_fusion(%arg0 : tensor) -> tensor { %0 = linalg.indexed_generic { indexing_maps = [#map0, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"] } - ins(%arg0 : tensor) { - ^bb0(%arg2: index, %arg3: index, %arg4: index, %arg5: index, %arg6: i32): // no predecessors + ins(%arg0 : tensor) outs(%arg0 : tensor) { + ^bb0(%arg2: index, %arg3: index, %arg4: index, %arg5: index, %arg6: i32, %arg7: i32): // no predecessors %2 = index_cast %arg2 : index to i32 %3 = addi %arg6, %2 : i32 linalg.yield %3 : i32 @@ -137,105 +144,124 @@ func @indexed_generic_op_reshape_consumer_fusion(%arg0 : tensor) return %1 : tensor } +// CHECK-DAG: #[[$MAP0:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> +// CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1 * 20 + d2 * 5 + d3)> // CHECK-LABEL: func @indexed_generic_op_reshape_consumer_fusion -// CHECK-NOT: linalg.tensor_reshape -// CHECK: linalg.indexed_generic -// CHECK-SAME: indexing_maps = [#[[$MAP0]], #[[$MAP1]]] -// CHECK-NOT: linalg.tensor_reshape +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: tensor +// CHECK-DAG: %[[C0:.+]] = constant 0 : index +// CHECK-DAG: %[[C1:.+]] = constant 1 : index +// CHECK-DAG: %[[C20:.+]] = constant 20 : index +// CHECK: %[[T0:.+]] = dim %[[ARG0]], %[[C0]] +// CHECK: %[[T1:.+]] = dim %[[ARG0]], %[[C1]] +// CHECK: %[[T2:.+]] = muli %[[T1]], %[[C20]] +// CHECK: %[[T3:.+]] = linalg.init_tensor [%[[T0]], %[[T2]]] +// CHECK: linalg.indexed_generic +// CHECK-SAME: indexing_maps = [#[[$MAP0]], #[[$MAP1]]] +// CHECK-SAME: outs(%[[T3]] : tensor) +// CHECK-NOT: linalg.tensor_reshape // ----- -// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2) -> (d0, d1 + d2 * 7)> -// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)> - #map0 = affine_map<(d0, d1, d2) -> (d0)> #map1 = affine_map<(d0, d1, d2) -> (d1, d2)> #map2 = affine_map<(d0, d1, d2) -> (d0, d2, d1)> #map3 = affine_map<(d0, d1, d2) -> (d0, d1, d2)> func @generic_op_021_permultation_reshape_producer_fusion(%arg0 : tensor<3x35xf32>) -> tensor<3x7x5xf32> { %0 = linalg.tensor_reshape %arg0 [#map0, #map1] : tensor<3x35xf32> into tensor<3x5x7xf32> - %1 = linalg.generic {indexing_maps = [#map2, #map3], iterator_types = ["parallel", "parallel", "parallel"]} ins(%0 : tensor<3x5x7xf32>) { - ^bb0(%arg2: f32): // no predecessors + %1 = linalg.init_tensor [3, 7, 5] : tensor<3x7x5xf32> + %2 = linalg.generic + {indexing_maps = [#map2, #map3], + iterator_types = ["parallel", "parallel", "parallel"]} + ins(%0 : tensor<3x5x7xf32>) outs(%1 : tensor<3x7x5xf32>) { + ^bb0(%arg2: f32, %arg3 : f32): // no predecessors linalg.yield %arg2 : f32 } -> tensor<3x7x5xf32> - return %1 : tensor<3x7x5xf32> + return %2 : tensor<3x7x5xf32> } +// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2) -> (d0, d1 + d2 * 7)> +// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)> // CHECK-LABEL: func @generic_op_021_permultation_reshape_producer_fusion -// CHECK-NOT: linalg.tensor_reshape -// CHECK: linalg.generic -// CHECK-SAME: indexing_maps = [#[[$MAP0]], #[[$MAP1]]] -// CHECK-NOT: linalg.tensor_reshape +// CHECK-NOT: linalg.tensor_reshape +// CHECK: linalg.generic +// CHECK-SAME: indexing_maps = [#[[$MAP0]], #[[$MAP1]]] // ----- -// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2) -> (d2, d0 * 7 + d1)> -// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)> - #map0 = affine_map<(d0, d1, d2) -> (d0)> #map1 = affine_map<(d0, d1, d2) -> (d1, d2)> #map2 = affine_map<(d0, d1, d2) -> (d1, d2, d0)> #map3 = affine_map<(d0, d1, d2) -> (d0, d1, d2)> func @generic_op_120_permultation_reshape_producer_fusion(%arg0 : tensor<3x35xf32>) -> tensor<5x7x3xf32> { %0 = linalg.tensor_reshape %arg0 [#map0, #map1] : tensor<3x35xf32> into tensor<3x5x7xf32> - %1 = linalg.generic {indexing_maps = [#map2, #map3], iterator_types = ["parallel", "parallel", "parallel"]} ins(%0 : tensor<3x5x7xf32>) { - ^bb0(%arg2: f32): // no predecessors + %1 = linalg.init_tensor [5, 7, 3] : tensor<5x7x3xf32> + %2 = linalg.generic + {indexing_maps = [#map2, #map3], + iterator_types = ["parallel", "parallel", "parallel"]} + ins(%0 : tensor<3x5x7xf32>) outs(%1 : tensor<5x7x3xf32>) { + ^bb0(%arg2: f32, %arg3: f32): // no predecessors linalg.yield %arg2 : f32 } -> tensor<5x7x3xf32> - return %1 : tensor<5x7x3xf32> + return %2 : tensor<5x7x3xf32> } -// CHECK-LABEL: func @generic_op_120_permultation_reshape_producer_fusion -// CHECK-NOT: linalg.tensor_reshape -// CHECK: linalg.generic -// CHECK-SAME: indexing_maps = [#[[$MAP0]], #[[$MAP1]]] -// CHECK-NOT: linalg.tensor_reshape +// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2) -> (d2, d0 * 7 + d1)> +// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)> +// CHECK: func @generic_op_120_permultation_reshape_producer_fusion +// CHECK-NOT: linalg.tensor_reshape +// CHECK: linalg.generic +// CHECK-SAME: indexing_maps = [#[[$MAP0]], #[[$MAP1]]] // ----- -// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2) -> (d1, d0 * 7 + d2)> -// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)> - #map0 = affine_map<(d0, d1, d2) -> (d0)> #map1 = affine_map<(d0, d1, d2) -> (d1, d2)> #map2 = affine_map<(d0, d1, d2) -> (d1, d0, d2)> #map3 = affine_map<(d0, d1, d2) -> (d0, d1, d2)> func @generic_op_102_permultation_reshape_producer_fusion(%arg0 : tensor<3x35xf32>) -> tensor<5x3x7xf32> { %0 = linalg.tensor_reshape %arg0 [#map0, #map1] : tensor<3x35xf32> into tensor<3x5x7xf32> - %1 = linalg.generic {indexing_maps = [#map2, #map3], iterator_types = ["parallel", "parallel", "parallel"]} ins(%0 : tensor<3x5x7xf32>) { - ^bb0(%arg2: f32): // no predecessors + %1 = linalg.init_tensor [5, 3, 7] : tensor<5x3x7xf32> + %2 = linalg.generic + {indexing_maps = [#map2, #map3], + iterator_types = ["parallel", "parallel", "parallel"]} + ins(%0 : tensor<3x5x7xf32>) outs(%1 : tensor<5x3x7xf32>) { + ^bb0(%arg2: f32, %arg3: f32): // no predecessors linalg.yield %arg2 : f32 } -> tensor<5x3x7xf32> - return %1 : tensor<5x3x7xf32> + return %2 : tensor<5x3x7xf32> } -// CHECK-LABEL: func @generic_op_102_permultation_reshape_producer_fusion -// CHECK-NOT: linalg.tensor_reshape -// CHECK: linalg.generic -// CHECK-SAME: indexing_maps = [#[[$MAP0]], #[[$MAP1]]] -// CHECK-NOT: linalg.tensor_reshape - -// ----- -// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)> -// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2) -> (d1, d0 * 7 + d2)> +// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2) -> (d1, d0 * 7 + d2)> +// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)> +// CHECK: func @generic_op_102_permultation_reshape_producer_fusion +// CHECK-NOT: linalg.tensor_reshape +// CHECK: linalg.generic +// CHECK-SAME: indexing_maps = [#[[$MAP0]], #[[$MAP1]]] +// ----- #map0 = affine_map<(d0, d1, d2) -> (d0, d1, d2)> #map1 = affine_map<(d0, d1, d2) -> (d1, d0, d2)> #map2 = affine_map<(d0, d1, d2) -> (d0)> #map3 = affine_map<(d0, d1, d2) -> (d1, d2)> func @generic_op_102_permultation_reshape_consumer_fusion(%arg0 : tensor<3x5x7xf32>) -> tensor<5x21xf32> { - %0 = linalg.generic {indexing_maps = [#map0, #map1], iterator_types = ["parallel", "parallel", "parallel"]} ins(%arg0 : tensor<3x5x7xf32>) { - ^bb0(%arg2: f32): // no predecessors + %0 = linalg.init_tensor [5, 3, 7] : tensor<5x3x7xf32> + %1 = linalg.generic + {indexing_maps = [#map0, #map1], + iterator_types = ["parallel", "parallel", "parallel"]} + ins(%arg0 : tensor<3x5x7xf32>) outs(%0 : tensor<5x3x7xf32>) { + ^bb0(%arg2: f32, %arg3 : f32): // no predecessors linalg.yield %arg2 : f32 } -> tensor<5x3x7xf32> - %1 = linalg.tensor_reshape %0 [#map2, #map3] : tensor<5x3x7xf32> into tensor<5x21xf32> - return %1 : tensor<5x21xf32> + %2 = linalg.tensor_reshape %1 [#map2, #map3] : tensor<5x3x7xf32> into tensor<5x21xf32> + return %2 : tensor<5x21xf32> } -// CHECK-LABEL: func @generic_op_102_permultation_reshape_consumer_fusion -// CHECK-NOT: linalg.tensor_reshape -// CHECK: linalg.generic -// CHECK-SAME: indexing_maps = [#[[$MAP0]], #[[$MAP1]]] -// CHECK-NOT: linalg.tensor_reshape + +// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)> +// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2) -> (d1, d0 * 7 + d2)> +// CHECK: func @generic_op_102_permultation_reshape_consumer_fusion +// CHECK-NOT: linalg.tensor_reshape +// CHECK: linalg.generic +// CHECK-SAME: indexing_maps = [#[[$MAP0]], #[[$MAP1]]] diff --git a/mlir/test/Dialect/Linalg/roundtrip.mlir b/mlir/test/Dialect/Linalg/roundtrip.mlir index be785ceb70d6d..c4eb8f8eac679 100644 --- a/mlir/test/Dialect/Linalg/roundtrip.mlir +++ b/mlir/test/Dialect/Linalg/roundtrip.mlir @@ -300,7 +300,7 @@ func @pooling_sum(%arg0: memref, func @generic(%arg0: memref, offset: ?, strides: [?, 1]>, %arg1: memref) { linalg.generic #trait - ins(%arg0 : memref, offset: ?, strides: [?, 1]>) + ins(%arg0 : memref, offset: ?, strides: [?, 1]>) outs(%arg1 : memref) attrs = {foo = 1} { ^bb(%0: vector<3x4xi4>, %1: f32) : @@ -314,14 +314,14 @@ func @generic(%arg0: memref, offset: ?, strides: [?, 1]>, // CHECK-SAME: indexing_maps = [#{{[0-9a-z]*}}, #{{[0-9a-z]*}}], // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel"], // CHECK-SAME: library_call = "some_external_function_name_1"} -// CHECK-SAME: ins({{.*}} : memref, #[[$strided2D]]>) +// CHECK-SAME: ins({{.*}} : memref, #[[$strided2D]]>) // CHECK-SAME: outs({{.*}} : memref) // CHECK-SAME: {foo = 1 : i64} func @generic_with_tensor_input(%arg0: tensor>, %arg1: memref) { linalg.generic #trait - ins(%arg0 : tensor>) + ins(%arg0 : tensor>) outs(%arg1 : memref) attrs = {foo = 1} { ^bb(%0: vector<3x4xi4>, %1: f32) : @@ -358,14 +358,14 @@ func @generic_without_inputs(%arg0 : memref) { // ----- -#accesses = [ +#accesses2 = [ affine_map<(i, j, k) -> (j, i)>, affine_map<(i, j, k) -> (i, k, i + j)>, affine_map<(i, j, k) -> (i, k, i + j)> ] #trait2 = { - indexing_maps = #accesses, + indexing_maps = #accesses2, iterator_types = ["parallel", "parallel", "parallel"], library_call = "some_external_function_name_1" } @@ -374,9 +374,10 @@ func @generic_with_tensor_input_and_output( %arg0: tensor>, %arg1: tensor) -> (tensor) { %0 = linalg.generic #trait2 - ins(%arg0, %arg1 : tensor>, tensor) + ins(%arg0, %arg1 : tensor>, tensor) + outs(%arg1 : tensor) attrs = {foo = 1} { - ^bb(%0: vector<3x4xi4>, %1: f32) : + ^bb(%0: vector<3x4xi4>, %1: f32, %2: f32) : %f0 = constant 0.0 : f32 linalg.yield %f0 : f32 } -> tensor @@ -386,21 +387,22 @@ func @generic_with_tensor_input_and_output( // CHECK: linalg.generic { // CHECK-SAME: indexing_maps = [#{{.*}}, #{{.*}}], iterator_types = ["parallel", "parallel", "parallel"], // CHECK-SAME: library_call = "some_external_function_name_1"} -// CHECK-SAME: ins({{.*}} : tensor>, tensor) +// CHECK-SAME: ins({{.*}} : tensor>, tensor) +// CHECK-SAME: outs({{.*}} : tensor) // CHECK-SAME: {foo = 1 : i64} // CHECK: -> tensor // CHECK: return {{.*}} : tensor // ----- -#accesses = [ +#accesses3 = [ affine_map<(i, j, k) -> (j, i)>, affine_map<(i, j, k) -> (i, k, i + j)>, affine_map<(i, j, k) -> (i, k, i + j)> ] -#trait2 = { - indexing_maps = #accesses, +#trait3 = { + indexing_maps = #accesses3, iterator_types = ["parallel", "parallel", "parallel"], library_call = "some_external_function_name_1" } @@ -408,10 +410,11 @@ func @generic_with_tensor_input_and_output( func @indexed_generic_with_tensor_input_and_output( %arg0: tensor>, %arg1: tensor) -> (tensor) { - %0 = linalg.indexed_generic #trait2 - ins(%arg0, %arg1 : tensor>, tensor) + %0 = linalg.indexed_generic #trait3 + ins(%arg0, %arg1 : tensor>, tensor) + outs(%arg1 : tensor) attrs = {foo = 1} { - ^bb(%i: index, %j: index, %k: index, %0: vector<3x4xi4>, %1: f32) : + ^bb(%i: index, %j: index, %k: index, %0: vector<3x4xi4>, %1: f32, %2: f32) : %f0 = constant 0.0 : f32 linalg.yield %f0 : f32 } -> tensor @@ -421,7 +424,8 @@ func @indexed_generic_with_tensor_input_and_output( // CHECK: linalg.indexed_generic { // CHECK-SAME: indexing_maps = [#{{.*}}, #{{.*}}], iterator_types = ["parallel", "parallel", "parallel"], // CHECK-SAME: library_call = "some_external_function_name_1"} -// CHECK-SAME: ins({{.*}} : tensor>, tensor) +// CHECK-SAME: ins({{.*}} : tensor>, tensor) +// CHECK-SAME: outs({{.*}} : tensor) // CHECK-SAME: {foo = 1 : i64} // CHECK: -> tensor // CHECK: return {{.*}} : tensor @@ -439,21 +443,23 @@ func @indexed_generic_with_tensor_input_and_output( library_call = "some_broadcast_external_fn" } -func @generic_op_zero_rank(%arg0: tensor) -> (tensor<3x4xf32>) +func @generic_op_zero_rank(%arg0: tensor, %arg1 : tensor<3x4xf32>) -> (tensor<3x4xf32>) { %0 = linalg.generic #trait_broadcast - ins(%arg0 : tensor) { - ^bb(%a: f32) : + ins(%arg0 : tensor) + outs(%arg1 : tensor<3x4xf32>) { + ^bb(%a: f32, %b: f32) : linalg.yield %a : f32 } -> tensor<3x4xf32> return %0 : tensor<3x4xf32> } -func @indexed_generic_op_zero_rank(%arg0: tensor) -> (tensor<3x4xf32>) +func @indexed_generic_op_zero_rank(%arg0: tensor, %arg1 : tensor<3x4xf32>) -> (tensor<3x4xf32>) { %0 = linalg.indexed_generic #trait_broadcast - ins(%arg0 : tensor) { - ^bb(%i: index, %j: index, %a: f32) : + ins(%arg0 : tensor) + outs(%arg1 : tensor<3x4xf32>) { + ^bb(%i: index, %j: index, %a: f32, %b: f32) : linalg.yield %a : f32 } -> tensor<3x4xf32> return %0 : tensor<3x4xf32> @@ -478,7 +484,7 @@ func @indexed_generic_op_zero_rank(%arg0: tensor) -> (tensor<3x4xf32>) func @generic_region(%arg0: memref, offset: ?, strides: [?, 1]>, %arg1: memref) { linalg.generic #trait3 - ins(%arg0 : memref, offset: ?, strides: [?, 1]>) + ins(%arg0 : memref, offset: ?, strides: [?, 1]>) outs(%arg1 : memref) attrs = {foo = 1} { ^bb(%a: vector<3x4xi4>, %b: f32) : @@ -491,7 +497,7 @@ func @generic_region(%arg0: memref, offset: ?, strides: [?, 1 // CHECK-SAME: indexing_maps = [#{{[0-9a-z]*}}, #{{[0-9a-z]*}}], // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel"], // CHECK-SAME: library_call = "some_external_function_name_2" -// CHECK-SAME: ins({{.*}} : memref, #[[$strided2D]]>) +// CHECK-SAME: ins({{.*}} : memref, #[[$strided2D]]>) // CHECK-SAME: outs({{.*}} : memref) // CHECK-SAME: attrs = {foo = 1 : i64} { // CHECK: ^{{.*}}(%{{.*}}: vector<3x4xi4>, %{{.*}}: f32): @@ -500,7 +506,7 @@ func @generic_region(%arg0: memref, offset: ?, strides: [?, 1 func @indexed_generic(%arg0: memref, offset: ?, strides: [?, 1]>, %arg1: memref) { linalg.indexed_generic #trait3 - ins(%arg0 : memref, offset: ?, strides: [?, 1]>) + ins(%arg0 : memref, offset: ?, strides: [?, 1]>) outs(%arg1 : memref) attrs = {foo = 1} { ^bb(%i: index, %j: index, %k: index, %a: vector<3x4xi4>, %b: f32) : @@ -564,8 +570,8 @@ func @reshape_static(%arg0: memref<3x4x5xf32>, %arg1: tensor<3x4x5xf32>, %arg2: affine_map<(i, j, k, l, m) -> (l, m)>] : tensor<3x4x5xf32> into tensor<1x3x4x1x5xf32> %rt0 = linalg.tensor_reshape %t0 [affine_map<(i, j, k, l, m) -> (i, j)>, - affine_map<(i, j, k, l, m) -> (k)>, - affine_map<(i, j, k, l, m) -> (l, m)>] : + affine_map<(i, j, k, l, m) -> (k)>, + affine_map<(i, j, k, l, m) -> (l, m)>] : tensor<1x3x4x1x5xf32> into tensor<3x4x5xf32> %t1 = linalg.tensor_reshape %arg2 [affine_map<(i, j, k, l, m) -> (i, j)>, affine_map<(i, j, k, l, m) -> (k)>, @@ -660,11 +666,13 @@ func @named_ops(%a3: memref, %b3: memref, %c3: memref) linalg.batch_matmul ins(%ta3, %tb3: tensor, tensor) outs(%c3: memref) - %res1 = linalg.batch_matmul ins(%ta3, %tb3: tensor, tensor) - init(%tc3: tensor) + %res1 = linalg.batch_matmul + ins(%ta3, %tb3: tensor, tensor) + outs(%tc3: tensor) -> tensor - %res2 = linalg.batch_matmul ins(%ta3, %b3: tensor, memref) - init(%tc3: tensor) + %res2 = linalg.batch_matmul + ins(%ta3, %b3: tensor, memref) + outs(%tc3: tensor) -> tensor return %res1, %res2 : tensor, tensor } diff --git a/mlir/test/Dialect/Linalg/sparse_1d.mlir b/mlir/test/Dialect/Linalg/sparse_1d.mlir index 4c14b2e89279d..4baf1d1c14032 100644 --- a/mlir/test/Dialect/Linalg/sparse_1d.mlir +++ b/mlir/test/Dialect/Linalg/sparse_1d.mlir @@ -32,8 +32,9 @@ // CHECK: } func @add_d(%arga: tensor<32xf32>, %argb: f32) -> tensor<32xf32> { %0 = linalg.generic #trait_d - ins(%arga: tensor<32xf32>) { - ^bb(%a: f32): + ins(%arga: tensor<32xf32>) + outs(%arga: tensor<32xf32>) { + ^bb(%a: f32, %s : f32): %0 = addf %a, %argb : f32 linalg.yield %0 : f32 } -> tensor<32xf32> @@ -58,8 +59,9 @@ func @add_d(%arga: tensor<32xf32>, %argb: f32) -> tensor<32xf32> { // CHECK: } func @mul_d(%arga: tensor<32xf32>, %argb: f32) -> tensor<32xf32> { %0 = linalg.generic #trait_d - ins(%arga: tensor<32xf32>) { - ^bb(%a: f32): + ins(%arga: tensor<32xf32>) + outs(%arga: tensor<32xf32>) { + ^bb(%a: f32, %s : f32): %0 = mulf %a, %argb : f32 linalg.yield %0 : f32 } -> tensor<32xf32> @@ -124,8 +126,9 @@ func @mul_d(%arga: tensor<32xf32>, %argb: f32) -> tensor<32xf32> { // CHECK: } func @add_s(%arga: tensor<32xf32>, %argb: f32) -> tensor<32xf32> { %0 = linalg.generic #trait_s - ins(%arga: tensor<32xf32>) { - ^bb(%a: f32): + ins(%arga: tensor<32xf32>) + outs(%arga: tensor<32xf32>) { + ^bb(%a: f32, %s : f32): %0 = addf %a, %argb : f32 linalg.yield %0 : f32 } -> tensor<32xf32> @@ -159,8 +162,9 @@ func @add_s(%arga: tensor<32xf32>, %argb: f32) -> tensor<32xf32> { // CHECK: } func @repeated_add_s(%arga: tensor<32xf32>) -> tensor<32xf32> { %0 = linalg.generic #trait_s - ins(%arga: tensor<32xf32>) { - ^bb(%a: f32): + ins(%arga: tensor<32xf32>) + outs(%arga: tensor<32xf32>) { + ^bb(%a: f32, %s : f32): %0 = addf %a, %a : f32 // same tensor %1 = addf %a, %a : f32 // should yield %2 = addf %0, %1 : f32 // one guard @@ -192,8 +196,9 @@ func @repeated_add_s(%arga: tensor<32xf32>) -> tensor<32xf32> { // CHECK: } func @mul_s(%arga: tensor<32xf32>, %argb: f32) -> tensor<32xf32> { %0 = linalg.generic #trait_s - ins(%arga: tensor<32xf32>) { - ^bb(%a: f32): + ins(%arga: tensor<32xf32>) + outs(%arga: tensor<32xf32>) { + ^bb(%a: f32, %s : f32): %0 = mulf %a, %argb : f32 linalg.yield %0 : f32 } -> tensor<32xf32> @@ -235,8 +240,9 @@ func @mul_s(%arga: tensor<32xf32>, %argb: f32) -> tensor<32xf32> { // CHECK: } func @add_dd(%arga: tensor<32xf32>, %argb: tensor<32xf32>) -> tensor<32xf32> { %0 = linalg.generic #trait_dd - ins(%arga, %argb: tensor<32xf32>, tensor<32xf32>) { - ^bb(%a: f32, %b: f32): + ins(%arga, %argb: tensor<32xf32>, tensor<32xf32>) + outs(%arga : tensor<32xf32>) { + ^bb(%a: f32, %b: f32, %s : f32): %0 = addf %a, %b : f32 linalg.yield %0 : f32 } -> tensor<32xf32> @@ -263,8 +269,9 @@ func @add_dd(%arga: tensor<32xf32>, %argb: tensor<32xf32>) -> tensor<32xf32> { // CHECK: } func @mul_dd(%arga: tensor<32xf32>, %argb: tensor<32xf32>) -> tensor<32xf32> { %0 = linalg.generic #trait_dd - ins(%arga, %argb: tensor<32xf32>, tensor<32xf32>) { - ^bb(%a: f32, %b: f32): + ins(%arga, %argb: tensor<32xf32>, tensor<32xf32>) + outs(%arga : tensor<32xf32>) { + ^bb(%a: f32, %b: f32, %s : f32): %0 = mulf %a, %b : f32 linalg.yield %0 : f32 } -> tensor<32xf32> @@ -335,8 +342,9 @@ func @mul_dd(%arga: tensor<32xf32>, %argb: tensor<32xf32>) -> tensor<32xf32> { // CHECK: } func @add_ds(%arga: tensor<32xf32>, %argb: tensor<32xf32>) -> tensor<32xf32> { %0 = linalg.generic #trait_ds - ins(%arga, %argb: tensor<32xf32>, tensor<32xf32>) { - ^bb(%a: f32, %b: f32): + ins(%arga, %argb: tensor<32xf32>, tensor<32xf32>) + outs(%arga : tensor<32xf32>) { + ^bb(%a: f32, %b: f32, %s : f32): %0 = addf %a, %b : f32 linalg.yield %0 : f32 } -> tensor<32xf32> @@ -368,8 +376,9 @@ func @add_ds(%arga: tensor<32xf32>, %argb: tensor<32xf32>) -> tensor<32xf32> { // CHECK: } func @mul_ds(%arga: tensor<32xf32>, %argb: tensor<32xf32>) -> tensor<32xf32> { %0 = linalg.generic #trait_ds - ins(%arga, %argb: tensor<32xf32>, tensor<32xf32>) { - ^bb(%a: f32, %b: f32): + ins(%arga, %argb: tensor<32xf32>, tensor<32xf32>) + outs(%arga : tensor<32xf32>) { + ^bb(%a: f32, %b: f32, %s : f32): %0 = mulf %a, %b : f32 linalg.yield %0 : f32 } -> tensor<32xf32> @@ -440,8 +449,9 @@ func @mul_ds(%arga: tensor<32xf32>, %argb: tensor<32xf32>) -> tensor<32xf32> { // CHECK: } func @add_sd(%arga: tensor<32xf32>, %argb: tensor<32xf32>) -> tensor<32xf32> { %0 = linalg.generic #trait_sd - ins(%arga, %argb: tensor<32xf32>, tensor<32xf32>) { - ^bb(%a: f32, %b: f32): + ins(%arga, %argb: tensor<32xf32>, tensor<32xf32>) + outs(%arga : tensor<32xf32>) { + ^bb(%a: f32, %b: f32, %s : f32): %0 = addf %a, %b : f32 linalg.yield %0 : f32 } -> tensor<32xf32> @@ -473,8 +483,9 @@ func @add_sd(%arga: tensor<32xf32>, %argb: tensor<32xf32>) -> tensor<32xf32> { // CHECK: } func @mul_sd(%arga: tensor<32xf32>, %argb: tensor<32xf32>) -> tensor<32xf32> { %0 = linalg.generic #trait_sd - ins(%arga, %argb: tensor<32xf32>, tensor<32xf32>) { - ^bb(%a: f32, %b: f32): + ins(%arga, %argb: tensor<32xf32>, tensor<32xf32>) + outs(%arga : tensor<32xf32>) { + ^bb(%a: f32, %b: f32, %s : f32): %0 = mulf %a, %b : f32 linalg.yield %0 : f32 } -> tensor<32xf32> @@ -569,8 +580,9 @@ func @mul_sd(%arga: tensor<32xf32>, %argb: tensor<32xf32>) -> tensor<32xf32> { // CHECK: } func @add_ss(%arga: tensor<32xf32>, %argb: tensor<32xf32>) -> tensor<32xf32> { %0 = linalg.generic #trait_ss - ins(%arga, %argb: tensor<32xf32>, tensor<32xf32>) { - ^bb(%a: f32, %b: f32): + ins(%arga, %argb: tensor<32xf32>, tensor<32xf32>) + outs(%arga : tensor<32xf32>) { + ^bb(%a: f32, %b: f32, %s : f32): %0 = addf %a, %b : f32 linalg.yield %0 : f32 } -> tensor<32xf32> @@ -628,8 +640,9 @@ func @add_ss(%arga: tensor<32xf32>, %argb: tensor<32xf32>) -> tensor<32xf32> { // CHECK: } func @mul_ss(%arga: tensor<32xf32>, %argb: tensor<32xf32>) -> tensor<32xf32> { %0 = linalg.generic #trait_ss - ins(%arga, %argb: tensor<32xf32>, tensor<32xf32>) { - ^bb(%a: f32, %b: f32): + ins(%arga, %argb: tensor<32xf32>, tensor<32xf32>) + outs(%arga : tensor<32xf32>) { + ^bb(%a: f32, %b: f32, %s : f32): %0 = mulf %a, %b : f32 linalg.yield %0 : f32 } -> tensor<32xf32> @@ -730,8 +743,9 @@ func @mul_ss(%arga: tensor<32xf32>, %argb: tensor<32xf32>) -> tensor<32xf32> { func @two_way_inv(%arga: tensor<16xf32>, %argb: tensor<16xf32>, %argc: f32) -> tensor<16xf32> { %0 = linalg.generic #trait_two_way_inv - ins(%arga, %argb : tensor<16xf32>, tensor<16xf32>) { - ^bb(%a : f32, %b : f32): + ins(%arga, %argb : tensor<16xf32>, tensor<16xf32>) + outs(%argb : tensor<16xf32>) { + ^bb(%a : f32, %b : f32, %c : f32): %0 = mulf %a, %argc : f32 %1 = mulf %b, %argc : f32 %2 = addf %0, %1 : f32 @@ -819,8 +833,9 @@ func @two_way_inv_alt(%arga: tensor<16xf32>, %argb: tensor<16xf32>, %argc: f32) -> tensor<16xf32> { // Same kernel, but now expressed as "x(i) = (a(i) + b(i)) * c". %0 = linalg.generic #trait_two_way_inv - ins(%arga, %argb : tensor<16xf32>, tensor<16xf32>) { - ^bb(%a : f32, %b : f32): + ins(%arga, %argb : tensor<16xf32>, tensor<16xf32>) + outs(%argb : tensor<16xf32>) { + ^bb(%a : f32, %b : f32, %c : f32): %0 = addf %a, %b : f32 %1 = mulf %0, %argc : f32 linalg.yield %1: f32 @@ -866,7 +881,7 @@ func @two_way_inv_alt(%arga: tensor<16xf32>, func @sum_reduction(%arga: tensor, %argx: tensor) -> tensor { %0 = linalg.generic #trait_sum_reduction ins(%arga : tensor) - init(%argx : tensor) { + outs(%argx : tensor) { ^bb(%a : f32, %x : f32): %0 = addf %x, %a : f32 linalg.yield %0: f32 @@ -975,7 +990,7 @@ func @sum_reduction_ss(%arga: tensor<16xf32>, // as two separate reductions kernels. %0 = linalg.generic #trait_sum_reduction_ss ins(%arga, %argb: tensor<16xf32>, tensor<16xf32>) - init(%argx : tensor) { + outs(%argx : tensor) { ^bb(%a : f32, %b : f32, %x : f32): %0 = addf %a, %b : f32 %1 = addf %x, %0 : f32 @@ -1094,7 +1109,7 @@ func @sum_reduction_inv(%arga: tensor<16xf32>, // as two separate reductions kernels. %0 = linalg.generic #trait_sum_reduction_inv_ss ins(%arga, %argb, %argc : tensor<16xf32>, tensor, tensor<16xf32>) - init(%argx : tensor) { + outs(%argx : tensor) { ^bb(%a : f32, %b : f32, %c : f32, %x : f32): %0 = mulf %a, %b : f32 %1 = addf %0, %c : f32 diff --git a/mlir/test/Dialect/Linalg/sparse_2d.mlir b/mlir/test/Dialect/Linalg/sparse_2d.mlir index dea7444cadae2..6612a723f23dd 100644 --- a/mlir/test/Dialect/Linalg/sparse_2d.mlir +++ b/mlir/test/Dialect/Linalg/sparse_2d.mlir @@ -39,8 +39,9 @@ // CHECK: } func @add_dd(%arga: tensor<32x16xf32>, %argb: tensor<32x16xf32>) -> tensor<32x16xf32> { %0 = linalg.generic #trait_dd - ins(%arga, %argb: tensor<32x16xf32>, tensor<32x16xf32>) { - ^bb(%a: f32, %b: f32): + ins(%arga, %argb: tensor<32x16xf32>, tensor<32x16xf32>) + outs(%arga: tensor<32x16xf32>) { + ^bb(%a: f32, %b: f32, %s: f32): %0 = addf %a, %b : f32 linalg.yield %0 : f32 } -> tensor<32x16xf32> @@ -70,8 +71,9 @@ func @add_dd(%arga: tensor<32x16xf32>, %argb: tensor<32x16xf32>) -> tensor<32x16 // CHECK: } func @mul_dd(%arga: tensor<32x16xf32>, %argb: tensor<32x16xf32>) -> tensor<32x16xf32> { %0 = linalg.generic #trait_dd - ins(%arga, %argb: tensor<32x16xf32>, tensor<32x16xf32>) { - ^bb(%a: f32, %b: f32): + ins(%arga, %argb: tensor<32x16xf32>, tensor<32x16xf32>) + outs(%arga : tensor<32x16xf32>) { + ^bb(%a: f32, %b: f32, %s: f32): %0 = mulf %a, %b : f32 linalg.yield %0 : f32 } -> tensor<32x16xf32> @@ -146,8 +148,9 @@ func @mul_dd(%arga: tensor<32x16xf32>, %argb: tensor<32x16xf32>) -> tensor<32x16 // CHECK: } func @add_ds(%arga: tensor<32x16xf32>, %argb: tensor<32x16xf32>) -> tensor<32x16xf32> { %0 = linalg.generic #trait_ds - ins(%arga, %argb: tensor<32x16xf32>, tensor<32x16xf32>) { - ^bb(%a: f32, %b: f32): + ins(%arga, %argb: tensor<32x16xf32>, tensor<32x16xf32>) + outs(%arga : tensor<32x16xf32>) { + ^bb(%a: f32, %b: f32, %s: f32): %0 = addf %a, %b : f32 linalg.yield %0 : f32 } -> tensor<32x16xf32> @@ -183,8 +186,9 @@ func @add_ds(%arga: tensor<32x16xf32>, %argb: tensor<32x16xf32>) -> tensor<32x16 // CHECK: } func @mul_ds(%arga: tensor<32x16xf32>, %argb: tensor<32x16xf32>) -> tensor<32x16xf32> { %0 = linalg.generic #trait_ds - ins(%arga, %argb: tensor<32x16xf32>, tensor<32x16xf32>) { - ^bb(%a: f32, %b: f32): + ins(%arga, %argb: tensor<32x16xf32>, tensor<32x16xf32>) + outs(%arga : tensor<32x16xf32>) { + ^bb(%a: f32, %b: f32, %s: f32): %0 = mulf %a, %b : f32 linalg.yield %0 : f32 } -> tensor<32x16xf32> @@ -264,8 +268,9 @@ func @mul_ds(%arga: tensor<32x16xf32>, %argb: tensor<32x16xf32>) -> tensor<32x16 // CHECK: } func @add_sd(%arga: tensor<32x16xf32>, %argb: tensor<32x16xf32>) -> tensor<32x16xf32> { %0 = linalg.generic #trait_sd - ins(%arga, %argb: tensor<32x16xf32>, tensor<32x16xf32>) { - ^bb(%a: f32, %b: f32): + ins(%arga, %argb: tensor<32x16xf32>, tensor<32x16xf32>) + outs(%arga : tensor<32x16xf32>) { + ^bb(%a: f32, %b: f32, %s: f32): %0 = addf %a, %b : f32 linalg.yield %0 : f32 } -> tensor<32x16xf32> @@ -302,8 +307,9 @@ func @add_sd(%arga: tensor<32x16xf32>, %argb: tensor<32x16xf32>) -> tensor<32x16 // CHECK: } func @mul_sd(%arga: tensor<32x16xf32>, %argb: tensor<32x16xf32>) -> tensor<32x16xf32> { %0 = linalg.generic #trait_sd - ins(%arga, %argb: tensor<32x16xf32>, tensor<32x16xf32>) { - ^bb(%a: f32, %b: f32): + ins(%arga, %argb: tensor<32x16xf32>, tensor<32x16xf32>) + outs(%arga : tensor<32x16xf32>) { + ^bb(%a: f32, %b: f32, %s: f32): %0 = mulf %a, %b : f32 linalg.yield %0 : f32 } -> tensor<32x16xf32> @@ -409,8 +415,9 @@ func @mul_sd(%arga: tensor<32x16xf32>, %argb: tensor<32x16xf32>) -> tensor<32x16 // CHECK: } func @add_ss(%arga: tensor<32x16xf32>, %argb: tensor<32x16xf32>) -> tensor<32x16xf32> { %0 = linalg.generic #trait_ss - ins(%arga, %argb: tensor<32x16xf32>, tensor<32x16xf32>) { - ^bb(%a: f32, %b: f32): + ins(%arga, %argb: tensor<32x16xf32>, tensor<32x16xf32>) + outs(%arga : tensor<32x16xf32>) { + ^bb(%a: f32, %b: f32, %s: f32): %0 = addf %a, %b : f32 linalg.yield %0 : f32 } -> tensor<32x16xf32> @@ -450,8 +457,9 @@ func @add_ss(%arga: tensor<32x16xf32>, %argb: tensor<32x16xf32>) -> tensor<32x16 // CHECK: } func @mul_ss(%arga: tensor<32x16xf32>, %argb: tensor<32x16xf32>) -> tensor<32x16xf32> { %0 = linalg.generic #trait_ss - ins(%arga, %argb: tensor<32x16xf32>, tensor<32x16xf32>) { - ^bb(%a: f32, %b: f32): + ins(%arga, %argb: tensor<32x16xf32>, tensor<32x16xf32>) + outs(%arga : tensor<32x16xf32>) { + ^bb(%a: f32, %b: f32, %s: f32): %0 = mulf %a, %b : f32 linalg.yield %0 : f32 } -> tensor<32x16xf32> @@ -627,8 +635,9 @@ func @mul_ss(%arga: tensor<32x16xf32>, %argb: tensor<32x16xf32>) -> tensor<32x16 // CHECK: } func @add_ss_ss(%arga: tensor<32x16xf32>, %argb: tensor<32x16xf32>) -> tensor<32x16xf32> { %0 = linalg.generic #trait_ss_ss - ins(%arga, %argb: tensor<32x16xf32>, tensor<32x16xf32>) { - ^bb(%a: f32, %b: f32): + ins(%arga, %argb: tensor<32x16xf32>, tensor<32x16xf32>) + outs(%arga : tensor<32x16xf32>) { + ^bb(%a: f32, %b: f32, %s: f32): %0 = addf %a, %b : f32 linalg.yield %0 : f32 } -> tensor<32x16xf32> @@ -721,8 +730,9 @@ func @add_ss_ss(%arga: tensor<32x16xf32>, %argb: tensor<32x16xf32>) -> tensor<32 // CHECK: } func @mul_ss_ss(%arga: tensor<32x16xf32>, %argb: tensor<32x16xf32>) -> tensor<32x16xf32> { %0 = linalg.generic #trait_ss_ss - ins(%arga, %argb: tensor<32x16xf32>, tensor<32x16xf32>) { - ^bb(%a: f32, %b: f32): + ins(%arga, %argb: tensor<32x16xf32>, tensor<32x16xf32>) + outs(%arga : tensor<32x16xf32>) { + ^bb(%a: f32, %b: f32, %s: f32): %0 = mulf %a, %b : f32 linalg.yield %0 : f32 } -> tensor<32x16xf32> @@ -898,8 +908,9 @@ func @mul_ss_ss(%arga: tensor<32x16xf32>, %argb: tensor<32x16xf32>) -> tensor<32 // CHECK: } func @add_sd_ds(%arga: tensor<32x16xf32>, %argb: tensor<32x16xf32>) -> tensor<32x16xf32> { %0 = linalg.generic #trait_ss_ss - ins(%arga, %argb: tensor<32x16xf32>, tensor<32x16xf32>) { - ^bb(%a: f32, %b: f32): + ins(%arga, %argb: tensor<32x16xf32>, tensor<32x16xf32>) + outs(%arga : tensor<32x16xf32>) { + ^bb(%a: f32, %b: f32, %s: f32): %0 = addf %a, %b : f32 linalg.yield %0 : f32 } -> tensor<32x16xf32> @@ -992,8 +1003,9 @@ func @add_sd_ds(%arga: tensor<32x16xf32>, %argb: tensor<32x16xf32>) -> tensor<32 // CHECK: } func @mul_sd_ds(%arga: tensor<32x16xf32>, %argb: tensor<32x16xf32>) -> tensor<32x16xf32> { %0 = linalg.generic #trait_ss_ss - ins(%arga, %argb: tensor<32x16xf32>, tensor<32x16xf32>) { - ^bb(%a: f32, %b: f32): + ins(%arga, %argb: tensor<32x16xf32>, tensor<32x16xf32>) + outs(%arga : tensor<32x16xf32>) { + ^bb(%a: f32, %b: f32, %s: f32): %0 = mulf %a, %b : f32 linalg.yield %0 : f32 } -> tensor<32x16xf32> @@ -1048,8 +1060,8 @@ func @mul_sd_ds(%arga: tensor<32x16xf32>, %argb: tensor<32x16xf32>) -> tensor<32 // CHECK: } func @matvec(%argA: tensor<16x32xf32>, %argb: tensor<32xf32>, %argx: tensor<16xf32>) -> tensor<16xf32> { %0 = linalg.generic #trait_matvec - ins(%argA, %argb : tensor<16x32xf32>, tensor<32xf32>) - init(%argx : tensor<16xf32>) { + ins(%argA, %argb : tensor<16x32xf32>, tensor<32xf32>) + outs(%argx : tensor<16xf32>) { ^bb(%A: f32, %b: f32, %x: f32): %0 = mulf %A, %b : f32 %1 = addf %0, %x : f32 @@ -1099,8 +1111,8 @@ func @matvec(%argA: tensor<16x32xf32>, %argb: tensor<32xf32>, %argx: tensor<16xf // CHECK: } func @sum_reduction(%arga: tensor<10x20xf32>, %argx: tensor) -> tensor { %0 = linalg.generic #trait_sum_reduction - ins(%arga : tensor<10x20xf32>) - init(%argx : tensor) { + ins(%arga : tensor<10x20xf32>) + outs(%argx : tensor) { ^bb(%a : f32, %x : f32): %0 = addf %x, %a : f32 linalg.yield %0: f32 @@ -1150,8 +1162,9 @@ func @sum_reduction(%arga: tensor<10x20xf32>, %argx: tensor) -> tensor func @scale(%arga: tensor) -> tensor { %0 = constant 2.0 : f64 %1 = linalg.generic #trait_scale - ins(%arga: tensor) { - ^bb(%a: f64): + ins(%arga: tensor) + outs(%arga: tensor) { + ^bb(%a: f64, %s: f64): %2 = mulf %a, %0 : f64 linalg.yield %2 : f64 } -> tensor @@ -1224,10 +1237,10 @@ func @scale(%arga: tensor) -> tensor { func @sampled_dense_dense(%args: tensor, %arga: tensor, %argb: tensor, - %argx: tensor) -> tensor { + %argx: tensor) -> tensor { %0 = linalg.generic #trait_sampled_dense_dense - ins(%args, %arga, %argb : tensor, tensor, tensor) - init(%argx : tensor) { + ins(%args, %arga, %argb : tensor, tensor, tensor) + outs(%argx : tensor) { ^bb(%s : f32, %a : f32, %b : f32, %x : f32): %0 = mulf %a, %b : f32 %1 = mulf %s, %0 : f32 @@ -1457,7 +1470,7 @@ func @sum_kernel_with_inv(%arga: tensor, tensor, tensor, tensor) - init(%argx : tensor) { + outs(%argx : tensor) { ^bb(%a : f32, %b : f32, %c : f32, %d : f32, %e : f32, %x : f32): %0 = mulf %a, %b : f32 %1 = mulf %0, %d : f32 diff --git a/mlir/test/Dialect/Linalg/sparse_3d.mlir b/mlir/test/Dialect/Linalg/sparse_3d.mlir index 41818bb982b6b..a32770e635e45 100644 --- a/mlir/test/Dialect/Linalg/sparse_3d.mlir +++ b/mlir/test/Dialect/Linalg/sparse_3d.mlir @@ -42,8 +42,9 @@ // CHECK: } func @add_ddd(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> { %0 = linalg.generic #trait_ddd - ins(%arga, %argb: tensor<32x16x8xf32>, tensor<32x16x8xf32>) { - ^bb(%a: f32, %b: f32): + ins(%arga, %argb: tensor<32x16x8xf32>, tensor<32x16x8xf32>) + outs(%arga : tensor<32x16x8xf32>) { + ^bb(%a: f32, %b: f32, %s: f32): %0 = addf %a, %b : f32 linalg.yield %0 : f32 } -> tensor<32x16x8xf32> @@ -76,8 +77,9 @@ func @add_ddd(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>) -> tensor< // CHECK: } func @mul_ddd(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> { %0 = linalg.generic #trait_ddd - ins(%arga, %argb: tensor<32x16x8xf32>, tensor<32x16x8xf32>) { - ^bb(%a: f32, %b: f32): + ins(%arga, %argb: tensor<32x16x8xf32>, tensor<32x16x8xf32>) + outs(%arga : tensor<32x16x8xf32>) { + ^bb(%a: f32, %b: f32, %s : f32): %0 = mulf %a, %b : f32 linalg.yield %0 : f32 } -> tensor<32x16x8xf32> @@ -157,8 +159,9 @@ func @mul_ddd(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>) -> tensor< // CHECK: } func @add_dds(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> { %0 = linalg.generic #trait_dds - ins(%arga, %argb: tensor<32x16x8xf32>, tensor<32x16x8xf32>) { - ^bb(%a: f32, %b: f32): + ins(%arga, %argb: tensor<32x16x8xf32>, tensor<32x16x8xf32>) + outs(%arga : tensor<32x16x8xf32>) { + ^bb(%a: f32, %b: f32, %s : f32): %0 = addf %a, %b : f32 linalg.yield %0 : f32 } -> tensor<32x16x8xf32> @@ -199,8 +202,9 @@ func @add_dds(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>) -> tensor< // CHECK: } func @mul_dds(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> { %0 = linalg.generic #trait_dds - ins(%arga, %argb: tensor<32x16x8xf32>, tensor<32x16x8xf32>) { - ^bb(%a: f32, %b: f32): + ins(%arga, %argb: tensor<32x16x8xf32>, tensor<32x16x8xf32>) + outs(%arga : tensor<32x16x8xf32>) { + ^bb(%a: f32, %b: f32, %s : f32): %0 = mulf %a, %b : f32 linalg.yield %0 : f32 } -> tensor<32x16x8xf32> @@ -284,8 +288,9 @@ func @mul_dds(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>) -> tensor< // CHECK: } func @add_dsd(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> { %0 = linalg.generic #trait_dsd - ins(%arga, %argb: tensor<32x16x8xf32>, tensor<32x16x8xf32>) { - ^bb(%a: f32, %b: f32): + ins(%arga, %argb: tensor<32x16x8xf32>, tensor<32x16x8xf32>) + outs(%arga : tensor<32x16x8xf32>) { + ^bb(%a: f32, %b: f32, %s : f32): %0 = addf %a, %b : f32 linalg.yield %0 : f32 } -> tensor<32x16x8xf32> @@ -326,8 +331,9 @@ func @add_dsd(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>) -> tensor< // CHECK: } func @mul_dsd(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> { %0 = linalg.generic #trait_dsd - ins(%arga, %argb: tensor<32x16x8xf32>, tensor<32x16x8xf32>) { - ^bb(%a: f32, %b: f32): + ins(%arga, %argb: tensor<32x16x8xf32>, tensor<32x16x8xf32>) + outs(%arga : tensor<32x16x8xf32>) { + ^bb(%a: f32, %b: f32, %s : f32): %0 = mulf %a, %b : f32 linalg.yield %0 : f32 } -> tensor<32x16x8xf32> @@ -437,8 +443,9 @@ func @mul_dsd(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>) -> tensor< // CHECK: } func @add_dss(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> { %0 = linalg.generic #trait_dss - ins(%arga, %argb: tensor<32x16x8xf32>, tensor<32x16x8xf32>) { - ^bb(%a: f32, %b: f32): + ins(%arga, %argb: tensor<32x16x8xf32>, tensor<32x16x8xf32>) + outs(%arga : tensor<32x16x8xf32>) { + ^bb(%a: f32, %b: f32, %s : f32): %0 = addf %a, %b : f32 linalg.yield %0 : f32 } -> tensor<32x16x8xf32> @@ -482,8 +489,9 @@ func @add_dss(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>) -> tensor< // CHECK: } func @mul_dss(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> { %0 = linalg.generic #trait_dss - ins(%arga, %argb: tensor<32x16x8xf32>, tensor<32x16x8xf32>) { - ^bb(%a: f32, %b: f32): + ins(%arga, %argb: tensor<32x16x8xf32>, tensor<32x16x8xf32>) + outs(%arga : tensor<32x16x8xf32>) { + ^bb(%a: f32, %b: f32, %s : f32): %0 = mulf %a, %b : f32 linalg.yield %0 : f32 } -> tensor<32x16x8xf32> @@ -572,8 +580,9 @@ func @mul_dss(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>) -> tensor< // CHECK: } func @add_sdd(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> { %0 = linalg.generic #trait_sdd - ins(%arga, %argb: tensor<32x16x8xf32>, tensor<32x16x8xf32>) { - ^bb(%a: f32, %b: f32): + ins(%arga, %argb: tensor<32x16x8xf32>, tensor<32x16x8xf32>) + outs(%arga : tensor<32x16x8xf32>) { + ^bb(%a: f32, %b: f32, %s : f32): %0 = addf %a, %b : f32 linalg.yield %0 : f32 } -> tensor<32x16x8xf32> @@ -615,8 +624,9 @@ func @add_sdd(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>) -> tensor< // CHECK: } func @mul_sdd(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> { %0 = linalg.generic #trait_sdd - ins(%arga, %argb: tensor<32x16x8xf32>, tensor<32x16x8xf32>) { - ^bb(%a: f32, %b: f32): + ins(%arga, %argb: tensor<32x16x8xf32>, tensor<32x16x8xf32>) + outs(%arga : tensor<32x16x8xf32>) { + ^bb(%a: f32, %b: f32, %s : f32): %0 = mulf %a, %b : f32 linalg.yield %0 : f32 } -> tensor<32x16x8xf32> @@ -731,8 +741,9 @@ func @mul_sdd(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>) -> tensor< // CHECK: } func @add_sds(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> { %0 = linalg.generic #trait_sds - ins(%arga, %argb: tensor<32x16x8xf32>, tensor<32x16x8xf32>) { - ^bb(%a: f32, %b: f32): + ins(%arga, %argb: tensor<32x16x8xf32>, tensor<32x16x8xf32>) + outs(%arga : tensor<32x16x8xf32>) { + ^bb(%a: f32, %b: f32, %s : f32): %0 = addf %a, %b : f32 linalg.yield %0 : f32 } -> tensor<32x16x8xf32> @@ -777,8 +788,9 @@ func @add_sds(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>) -> tensor< // CHECK: } func @mul_sds(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> { %0 = linalg.generic #trait_sds - ins(%arga, %argb: tensor<32x16x8xf32>, tensor<32x16x8xf32>) { - ^bb(%a: f32, %b: f32): + ins(%arga, %argb: tensor<32x16x8xf32>, tensor<32x16x8xf32>) + outs(%arga : tensor<32x16x8xf32>) { + ^bb(%a: f32, %b: f32, %s : f32): %0 = mulf %a, %b : f32 linalg.yield %0 : f32 } -> tensor<32x16x8xf32> @@ -897,8 +909,9 @@ func @mul_sds(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>) -> tensor< // CHECK: } func @add_ssd(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> { %0 = linalg.generic #trait_ssd - ins(%arga, %argb: tensor<32x16x8xf32>, tensor<32x16x8xf32>) { - ^bb(%a: f32, %b: f32): + ins(%arga, %argb: tensor<32x16x8xf32>, tensor<32x16x8xf32>) + outs(%arga : tensor<32x16x8xf32>) { + ^bb(%a: f32, %b: f32, %s : f32): %0 = addf %a, %b : f32 linalg.yield %0 : f32 } -> tensor<32x16x8xf32> @@ -943,8 +956,9 @@ func @add_ssd(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>) -> tensor< // CHECK: } func @mul_ssd(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> { %0 = linalg.generic #trait_ssd - ins(%arga, %argb: tensor<32x16x8xf32>, tensor<32x16x8xf32>) { - ^bb(%a: f32, %b: f32): + ins(%arga, %argb: tensor<32x16x8xf32>, tensor<32x16x8xf32>) + outs(%arga : tensor<32x16x8xf32>) { + ^bb(%a: f32, %b: f32, %s : f32): %0 = mulf %a, %b : f32 linalg.yield %0 : f32 } -> tensor<32x16x8xf32> @@ -1089,8 +1103,9 @@ func @mul_ssd(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>) -> tensor< // CHECK: } func @add_sss(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> { %0 = linalg.generic #trait_sss - ins(%arga, %argb: tensor<32x16x8xf32>, tensor<32x16x8xf32>) { - ^bb(%a: f32, %b: f32): + ins(%arga, %argb: tensor<32x16x8xf32>, tensor<32x16x8xf32>) + outs(%arga : tensor<32x16x8xf32>) { + ^bb(%a: f32, %b: f32, %s : f32): %0 = addf %a, %b : f32 linalg.yield %0 : f32 } -> tensor<32x16x8xf32> @@ -1138,8 +1153,9 @@ func @add_sss(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>) -> tensor< // CHECK: } func @mul_sss(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> { %0 = linalg.generic #trait_sss - ins(%arga, %argb: tensor<32x16x8xf32>, tensor<32x16x8xf32>) { - ^bb(%a: f32, %b: f32): + ins(%arga, %argb: tensor<32x16x8xf32>, tensor<32x16x8xf32>) + outs(%arga : tensor<32x16x8xf32>) { + ^bb(%a: f32, %b: f32, %s : f32): %0 = mulf %a, %b : f32 linalg.yield %0 : f32 } -> tensor<32x16x8xf32> @@ -1213,8 +1229,8 @@ func @kernel_3d(%arga: tensor, %argc: tensor, %argd: tensor) -> tensor { %0 = linalg.generic #trait_kernel_3d - ins(%argb, %argc, %argd : tensor, tensor, tensor) - init(%arga : tensor) { + ins(%argb, %argc, %argd : tensor, tensor, tensor) + outs(%arga : tensor) { ^bb(%b: f32, %c: f32, %d : f32, %a : f32): %0 = mulf %b, %c : f32 %1 = mulf %0, %d : f32 @@ -1275,8 +1291,8 @@ func @kernel_3d(%arga: tensor, // CHECK: } func @sum_reduction(%arga: tensor<10x20x30xf32>, %argx: tensor) -> tensor { %0 = linalg.generic #trait_sum_reduction - ins(%arga : tensor<10x20x30xf32>) - init(%argx : tensor) { + ins(%arga : tensor<10x20x30xf32>) + outs(%argx : tensor) { ^bb(%a : f32, %x : f32): %0 = addf %x, %a : f32 linalg.yield %0: f32 @@ -1334,7 +1350,7 @@ func @sum_reduction_inv(%arga: tensor, %argx: tensor) -> tensor { %0 = linalg.generic #trait_sum_reduction_inv ins(%arga, %argb : tensor, tensor) - init(%argx : tensor) { + outs(%argx : tensor) { ^bb(%a : f32, %b : f32, %x : f32): %0 = mulf %a, %b : f32 %1 = addf %x, %0 : f32 @@ -1363,7 +1379,8 @@ func @sum_reduction_inv(%arga: tensor, // CHECK-LABEL: func @invariants( // CHECK-SAME: %[[VAL_0:.*]]: tensor<10xf32>, // CHECK-SAME: %[[VAL_1:.*]]: tensor<20xf32>, -// CHECK-SAME: %[[VAL_2:.*]]: tensor<30xf32>) -> tensor<10x20x30xf32> { +// CHECK-SAME: %[[VAL_2:.*]]: tensor<30xf32>, +// CHECK-SAME: %[[SHAPE:.*]]: tensor<10x20x30xf32>) -> tensor<10x20x30xf32> { // CHECK: %[[VAL_3:.*]] = constant 10 : index // CHECK: %[[VAL_4:.*]] = constant 20 : index // CHECK: %[[VAL_5:.*]] = constant 30 : index @@ -1390,10 +1407,12 @@ func @sum_reduction_inv(%arga: tensor, // CHECK: } func @invariants(%arga: tensor<10xf32>, %argb: tensor<20xf32>, - %argc: tensor<30xf32>) -> tensor<10x20x30xf32> { + %argc: tensor<30xf32>, + %shape : tensor<10x20x30xf32>) -> tensor<10x20x30xf32> { %0 = linalg.generic #trait_invariants - ins(%arga, %argb, %argc : tensor<10xf32>, tensor<20xf32>, tensor<30xf32>) { - ^bb(%a : f32, %b : f32, %c : f32): + ins(%arga, %argb, %argc : tensor<10xf32>, tensor<20xf32>, tensor<30xf32>) + outs(%shape : tensor<10x20x30xf32>) { + ^bb(%a : f32, %b : f32, %c : f32, %s : f32): %0 = mulf %a, %b : f32 %1 = mulf %0, %c : f32 linalg.yield %1: f32 diff --git a/mlir/test/Dialect/Linalg/sparse_invalid.mlir b/mlir/test/Dialect/Linalg/sparse_invalid.mlir index a75ec361a7a18..bb64e80785fa6 100644 --- a/mlir/test/Dialect/Linalg/sparse_invalid.mlir +++ b/mlir/test/Dialect/Linalg/sparse_invalid.mlir @@ -12,11 +12,14 @@ iterator_types = ["parallel"] } -func @invalid_memref(%arga: memref<32xf32>, %argb: f32) -> tensor<32xf32> { +func @invalid_memref(%arga: memref<32xf32>, %argb: f32, %shape: tensor<32xf32>) + -> tensor<32xf32> +{ // expected-error@+1 {{'linalg.generic' op expected sparse annotations on tensors only}} %0 = linalg.generic #trait_memref - ins(%arga: memref<32xf32>) { - ^bb(%a: f32): + ins(%arga: memref<32xf32>) + outs(%shape: tensor<32xf32>) { + ^bb(%a: f32, %s: f32): %0 = addf %a, %argb : f32 linalg.yield %0 : f32 } -> tensor<32xf32> @@ -25,79 +28,6 @@ func @invalid_memref(%arga: memref<32xf32>, %argb: f32) -> tensor<32xf32> { // ----- -#trait_two_out = { - indexing_maps = [ - affine_map<(i) -> (i)>, // a - affine_map<(i) -> (i)>, // x (out) - affine_map<(i) -> (i)> // y (out) - ], - sparse = [ - [ "S" ], // a - [ "D" ], // x - [ "D" ] // y - ], - iterator_types = ["parallel"] -} - -func @invalid_two_out(%arga: tensor<32xf32>) -> tensor<32xf32> { - // expected-error@+1 {{'linalg.generic' op expected single output tensor}} - %0, %1 = linalg.generic #trait_two_out - ins(%arga: tensor<32xf32>) { - ^bb(%a: f32): - %0 = addf %a, %a : f32 - linalg.yield %a, %0 : f32, f32 - } -> tensor<32xf32>, tensor<32xf32> - return %1 : tensor<32xf32> -} - -// ----- - -#trait_two_blocks = { - indexing_maps = [ - affine_map<(i) -> (i)>, // a - affine_map<(i) -> (i)> // x (out) - ], - sparse = [ - [ "S" ], // a - [ "D" ] // x - ], - iterator_types = ["parallel"] -} - -func @invalid_two_blocks(%arga: tensor<32xf32>) -> tensor<32xf32> { - // expected-error@+1 {{'linalg.generic' op expects region #0 to have 0 or 1 blocks}} - %0 = linalg.generic #trait_two_blocks - ins(%arga: tensor<32xf32>) { - ^bb1(%a: f32): - %0 = addf %a, %a : f32 - ^bb2: - linalg.yield %0 : f32 - } -> tensor<32xf32> - return %0 : tensor<32xf32> -} - -// ----- - -#trait_no_block = { - indexing_maps = [ - affine_map<(i) -> (i)> // a - ], - sparse = [ - [ "S" ] // a - ], - iterator_types = ["parallel"] -} - -func @invalid_no_block(%arga: tensor<32xf32>) { - // expected-error@+1 {{'linalg.generic' op expected region with 1 block}} - linalg.generic #trait_no_block - ins(%arga: tensor<32xf32>) { - } - return -} - -// ----- - #trait_too_many = { indexing_maps = [ affine_map<(i) -> (i)>, // a @@ -114,8 +44,9 @@ func @invalid_no_block(%arga: tensor<32xf32>) { func @invalid_too_many(%arga: tensor<32xf32>, %argb: f32) -> tensor<32xf32> { // expected-error@+1 {{'linalg.generic' op expected one sparse annotation for each tensor}} %0 = linalg.generic #trait_too_many - ins(%arga: tensor<32xf32>) { - ^bb(%a: f32): + ins(%arga: tensor<32xf32>) + outs(%arga: tensor<32xf32>) { + ^bb(%a: f32, %s: f32): %0 = addf %a, %argb : f32 linalg.yield %0 : f32 } -> tensor<32xf32> @@ -136,8 +67,9 @@ func @invalid_too_many(%arga: tensor<32xf32>, %argb: f32) -> tensor<32xf32> { func @invalid_no_array(%arga: tensor<32xf32>, %argb: f32) -> tensor<32xf32> { // expected-error@+1 {{'linalg.generic' op expected sparse annotation array for tensor 0}} %0 = linalg.generic #trait_no_array - ins(%arga: tensor<32xf32>) { - ^bb(%a: f32): + ins(%arga: tensor<32xf32>) + outs(%arga: tensor<32xf32>) { + ^bb(%a: f32, %s: f32): %0 = addf %a, %argb : f32 linalg.yield %0 : f32 } -> tensor<32xf32> @@ -161,8 +93,9 @@ func @invalid_no_array(%arga: tensor<32xf32>, %argb: f32) -> tensor<32xf32> { func @invalid_wrong_rank(%arga: tensor<32xf32>, %argb: f32) -> tensor<32xf32> { // expected-error@+1 {{'linalg.generic' op expected sparse annotation with rank 1 for tensor 1}} %0 = linalg.generic #trait_wrong_rank - ins(%arga: tensor<32xf32>) { - ^bb(%a: f32): + ins(%arga: tensor<32xf32>) + outs(%arga: tensor<32xf32>) { + ^bb(%a: f32, %s: f32): %0 = addf %a, %argb : f32 linalg.yield %0 : f32 } -> tensor<32xf32> @@ -186,8 +119,9 @@ func @invalid_wrong_rank(%arga: tensor<32xf32>, %argb: f32) -> tensor<32xf32> { func @invalid_no_string(%arga: tensor<32x16xf32>, %argb: f32) -> tensor<32x16xf32> { // expected-error@+1 {{'linalg.generic' op expected sparse annotation at position 1 for tensor 0}} %0 = linalg.generic #trait_no_string - ins(%arga: tensor<32x16xf32>) { - ^bb(%a: f32): + ins(%arga: tensor<32x16xf32>) + outs(%arga: tensor<32x16xf32>) { + ^bb(%a: f32, %s: f32): %0 = addf %a, %argb : f32 linalg.yield %0 : f32 } -> tensor<32x16xf32> @@ -211,8 +145,9 @@ func @invalid_no_string(%arga: tensor<32x16xf32>, %argb: f32) -> tensor<32x16xf3 func @invalid_wrong_symbol(%arga: tensor<32x16xf32>, %argb: f32) -> tensor<32x16xf32> { // expected-error@+1 {{'linalg.generic' op expected sparse annotation at position 1 for tensor 1}} %0 = linalg.generic #trait_wrong_symbol - ins(%arga: tensor<32x16xf32>) { - ^bb(%a: f32): + ins(%arga: tensor<32x16xf32>) + outs(%arga: tensor<32x16xf32>) { + ^bb(%a: f32, %s: f32): %0 = addf %a, %argb : f32 linalg.yield %0 : f32 } -> tensor<32x16xf32> @@ -236,8 +171,9 @@ func @invalid_wrong_symbol(%arga: tensor<32x16xf32>, %argb: f32) -> tensor<32x16 func @invalid_no_sparse_output(%arga: tensor<32x16xf32>, %argb: f32) -> tensor<32x16xf32> { // expected-error@+1 {{'linalg.generic' op sparse output tensors not supported (yet)}} %0 = linalg.generic #trait_no_sparse_output - ins(%arga: tensor<32x16xf32>) { - ^bb(%a: f32): + ins(%arga: tensor<32x16xf32>) + outs(%arga: tensor<32x16xf32>) { + ^bb(%a: f32, %s: f32): %0 = addf %a, %argb : f32 linalg.yield %0 : f32 } -> tensor<32x16xf32> diff --git a/mlir/test/Dialect/Linalg/sparse_parallel.mlir b/mlir/test/Dialect/Linalg/sparse_parallel.mlir index a75406fbab690..3d3d51ae03274 100644 --- a/mlir/test/Dialect/Linalg/sparse_parallel.mlir +++ b/mlir/test/Dialect/Linalg/sparse_parallel.mlir @@ -50,8 +50,9 @@ // func @scale_dd(%scale: f32, %arga: tensor) -> tensor { %0 = linalg.generic #trait_dd - ins(%arga: tensor) { - ^bb(%a: f32): + ins(%arga: tensor) + outs(%arga: tensor) { + ^bb(%a: f32, %s: f32): %0 = mulf %a, %scale : f32 linalg.yield %0 : f32 } -> tensor @@ -99,8 +100,9 @@ func @scale_dd(%scale: f32, %arga: tensor) -> tensor { // func @scale_ss(%scale: f32, %arga: tensor) -> tensor { %0 = linalg.generic #trait_ss - ins(%arga: tensor) { - ^bb(%a: f32): + ins(%arga: tensor) + outs(%arga: tensor) { + ^bb(%a: f32, %s: f32): %0 = mulf %a, %scale : f32 linalg.yield %0 : f32 } -> tensor @@ -151,7 +153,7 @@ func @scale_ss(%scale: f32, %arga: tensor) -> tensor { func @matvec(%argA: tensor<16x32xf32>, %argb: tensor<32xf32>, %argx: tensor<16xf32>) -> tensor<16xf32> { %0 = linalg.generic #trait_matvec ins(%argA, %argb : tensor<16x32xf32>, tensor<32xf32>) - init(%argx : tensor<16xf32>) { + outs(%argx : tensor<16xf32>) { ^bb(%A: f32, %b: f32, %x: f32): %0 = mulf %A, %b : f32 %1 = addf %0, %x : f32 diff --git a/mlir/test/Dialect/Linalg/sparse_storage.mlir b/mlir/test/Dialect/Linalg/sparse_storage.mlir index c63bdb1e413d3..69b8e1903d690 100644 --- a/mlir/test/Dialect/Linalg/sparse_storage.mlir +++ b/mlir/test/Dialect/Linalg/sparse_storage.mlir @@ -88,8 +88,9 @@ func @mul_dd(%arga: tensor<32xf64>, %argb: tensor<32xf64>) -> tensor<32xf64> { %0 = linalg.generic #trait_mul_1d - ins(%arga, %argb: tensor<32xf64>, tensor<32xf64>) { - ^bb(%a: f64, %b: f64): + ins(%arga, %argb: tensor<32xf64>, tensor<32xf64>) + outs(%arga : tensor<32xf64>) { + ^bb(%a: f64, %b: f64, %s: f64): %0 = mulf %a, %b : f64 linalg.yield %0 : f64 } -> tensor<32xf64> diff --git a/mlir/test/Dialect/Linalg/tile-and-distribute.mlir b/mlir/test/Dialect/Linalg/tile-and-distribute.mlir index 2a6a7ba7b7e34..fcecf896ac5dc 100644 --- a/mlir/test/Dialect/Linalg/tile-and-distribute.mlir +++ b/mlir/test/Dialect/Linalg/tile-and-distribute.mlir @@ -198,14 +198,14 @@ func @matmul_tensors( // CHECK: %[[sTB:.*]] = subtensor %[[TB]][{{.*}}] : tensor to tensor // CHECK: %[[sTC:.*]] = subtensor %[[TC2]][{{.*}}] : tensor to tensor // CHECK: %[[sTD:.*]] = linalg.matmul ins(%[[sTA]], %[[sTB]] : tensor, tensor) -// CHECK-SAME: init(%[[sTC]] : tensor) -> tensor +// CHECK-SAME: outs(%[[sTC]] : tensor) -> tensor // CHECK: %[[TD:.*]] = subtensor_insert %[[sTD]] into %[[TC2]][{{.*}}] : tensor into tensor // CHECK: scf.yield %[[TD]] : tensor // CHECK: scf.yield %[[TD2]] : tensor // CHECK: scf.yield %[[TD1]] : tensor %0 = linalg.matmul {__internal_linalg_transform__ = "tensors_distribute1"} ins(%arg0, %arg1: tensor, tensor) - init(%arg2: tensor) + outs(%arg2: tensor) -> tensor // CHECK: return %[[TD0]] : tensor diff --git a/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir b/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir index 41adff7d46c3f..9e96880885680 100644 --- a/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir +++ b/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir @@ -8,7 +8,7 @@ func @matmul_tensors(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { %t0 = linalg.matmul ins(%arg0, %arg1: tensor, tensor) - init(%arg2: tensor) + outs(%arg2: tensor) -> tensor %c4 = constant 4 : index @@ -25,7 +25,7 @@ func @matmul_tensors(%arg0: tensor, %arg1: tensor, %arg2: tens %6 = subtensor %t0[%arg3, %arg7][%c2, 4][1, 1] : tensor to tensor %7 = subtensor %arg1[%arg7, %arg5][4, %c3][1, 1] : tensor to tensor<4x?xf32> %8 = subtensor %arg8[%arg3, %arg5][%c2, %c3][1, 1] : tensor to tensor - %9 = linalg.matmul ins(%6, %7 : tensor, tensor<4x?xf32>) init(%8 : tensor) -> tensor + %9 = linalg.matmul ins(%6, %7 : tensor, tensor<4x?xf32>) outs(%8 : tensor) -> tensor %10 = subtensor_insert %9 into %arg8[%arg3, %arg5] [%c2, %c3] [1, 1] : tensor into tensor scf.yield %10 : tensor } @@ -53,6 +53,6 @@ func @matmul_tensors(%arg0: tensor, %arg1: tensor, %arg2: tens // subtensors of the producing matmul. // CHECK-DAG: %[[stB2:.*]] = subtensor %[[B]][0, %[[K]]] [%[[dA1]], 4] [1, 1] : tensor to tensor // CHECK-DAG: %[[stC:.*]] = subtensor %[[C]][%[[I]], %[[K]]] [2, 4] [1, 1] : tensor to tensor<2x4xf32> -// CHECK: %[[stD:.*]] = linalg.matmul ins(%[[stA]], %[[stB2]] : tensor<2x?xf32>, tensor) init(%[[stC]] : tensor<2x4xf32>) -> tensor<2x4xf32> -// CHECK-NEXT: %[[stG:.*]] = linalg.matmul ins(%[[stD]], %[[stB1]] : tensor<2x4xf32>, tensor<4x3xf32>) init(%[[stF]] : tensor<2x3xf32>) -> tensor<2x3xf32> +// CHECK: %[[stD:.*]] = linalg.matmul ins(%[[stA]], %[[stB2]] : tensor<2x?xf32>, tensor) outs(%[[stC]] : tensor<2x4xf32>) -> tensor<2x4xf32> +// CHECK-NEXT: %[[stG:.*]] = linalg.matmul ins(%[[stD]], %[[stB1]] : tensor<2x4xf32>, tensor<4x3xf32>) outs(%[[stF]] : tensor<2x3xf32>) -> tensor<2x3xf32> // CHECK-NEXT: subtensor_insert %[[stG]] into %[[RES]][%[[I]], %[[J]]] diff --git a/mlir/test/Dialect/Linalg/tile-tensors.mlir b/mlir/test/Dialect/Linalg/tile-tensors.mlir index b899cb3e00495..787ea8d2b395b 100644 --- a/mlir/test/Dialect/Linalg/tile-tensors.mlir +++ b/mlir/test/Dialect/Linalg/tile-tensors.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,3,4" -mlir-disable-threading=true | FileCheck %s +// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,3,4" | FileCheck %s // CHECK-LABEL: func @matmul_tensors( // CHECK-SAME: %[[TA:[0-9a-z]+]]: tensor @@ -14,13 +14,13 @@ func @matmul_tensors( // CHECK: %[[sTB:.*]] = subtensor %[[TB]][{{.*}}] : tensor to tensor // CHECK: %[[sTC:.*]] = subtensor %[[TC2]][{{.*}}] : tensor to tensor // CHECK: %[[sTD:.*]] = linalg.matmul ins(%[[sTA]], %[[sTB]] : tensor, tensor) -// CHECK-SAME: init(%[[sTC]] : tensor) -> tensor +// CHECK-SAME: outs(%[[sTC]] : tensor) -> tensor // CHECK: %[[TD:.*]] = subtensor_insert %[[sTD]] into %[[TC2]][{{.*}}] : tensor into tensor // CHECK: scf.yield %[[TD]] : tensor // CHECK: scf.yield %[[TD2]] : tensor // CHECK: scf.yield %[[TD1]] : tensor %0 = linalg.matmul ins(%arg0, %arg1: tensor, tensor) - init(%arg2: tensor) + outs(%arg2: tensor) -> tensor // CHECK: return %[[TD0]] : tensor diff --git a/mlir/test/EDSC/builder-api-test.cpp b/mlir/test/EDSC/builder-api-test.cpp index b713ae98b1073..db200ba5f90ff 100644 --- a/mlir/test/EDSC/builder-api-test.cpp +++ b/mlir/test/EDSC/builder-api-test.cpp @@ -1101,7 +1101,7 @@ TEST_FUNC(linalg_metadata_ops) { // CHECK-SAME: affine_map<(d0, d1, d2) -> (d0, d1)>], // CHECK-SAME: iterator_types = ["parallel", "parallel", "reduction"] // CHECK-SAME: ins(%{{[a-z0-9]*}}, %{{[a-z0-9]*}} : tensor, memref) -// CHECK-SAME: init(%{{[a-z0-9]*}} : tensor) +// CHECK-SAME: outs(%{{[a-z0-9]*}} : tensor) // CHECK: mulf // CHECK: addf // CHECK: } -> tensor @@ -1115,14 +1115,15 @@ TEST_FUNC(linalg_tensors_test) { {ShapedType::kDynamicSize, ShapedType::kDynamicSize}, f32Type, {}, 0); auto tensorType = RankedTensorType::get( {ShapedType::kDynamicSize, ShapedType::kDynamicSize}, f32Type); - auto f = makeFunction("linalg_tensors", {}, {tensorType, memrefType}); + auto f = + makeFunction("linalg_tensors", {}, {tensorType, memrefType, tensorType}); OpBuilder builder(f.getBody()); ScopedContext scope(builder, f.getLoc()); - Value A(f.getArgument(0)), B(f.getArgument(1)); + Value A(f.getArgument(0)), B(f.getArgument(1)), C(f.getArgument(2)); AffineExpr i, j; bindDims(&globalContext(), i, j); - StructuredIndexed SA(A), SB(B), SC(tensorType); + StructuredIndexed SA(A), SB(B), SC(C); Value added = linalg_generic_pointwise_add(SA({i, j}), SB({i, j}), SC({i, j})) ->getResult(0); Value maxed = linalg_generic_pointwise_max( @@ -1223,7 +1224,8 @@ TEST_FUNC(builder_loop_for_yield) { [&](Value iv, ValueRange args) { Value sum = args[0] + args[1]; return scf::ValueVector{args[1], sum}; - }).getResults(); + }) + .getResults(); results[0] + results[1]; // clang-format off diff --git a/mlir/test/mlir-linalg-ods-gen/test-linalg-ods-gen.tc b/mlir/test/mlir-linalg-ods-gen/test-linalg-ods-gen.tc index 528fae883d19e..f81380f02bb38 100644 --- a/mlir/test/mlir-linalg-ods-gen/test-linalg-ods-gen.tc +++ b/mlir/test/mlir-linalg-ods-gen/test-linalg-ods-gen.tc @@ -4,7 +4,6 @@ // ODS-LABEL: def Test1Op : LinalgStructuredBase_Op<"test1", [ // ODS-NEXT: AttrSizedOperandSegments // ODS-NEXT: DeclareOpInterfaceMethods, -// ODS-NEXT: NamedStructuredOpTrait // ODS-NEXT: SingleBlockImplicitTerminator<"YieldOp"> // // IMPL-LABEL: ArrayAttr Test1Op::iterator_types() { @@ -29,7 +28,6 @@ def test1(A: f32(M, K), B: f32(K)) -> (C: f32(M)) { // ODS-LABEL: def Test2Op : LinalgStructuredBase_Op<"test2", [ // ODS-NEXT: AttrSizedOperandSegments // ODS-NEXT: DeclareOpInterfaceMethods, -// ODS-NEXT: NamedStructuredOpTrait // ODS-NEXT: SingleBlockImplicitTerminator<"YieldOp"> // // IMPL-LABEL: ArrayAttr Test2Op::iterator_types() { @@ -54,7 +52,6 @@ def test2(A: f32(M, K), B: f32(K, N)) -> (C: f32(M, N)) { // ODS-LABEL: def Test3Op : LinalgStructuredBase_Op<"test3", [ // ODS-NEXT: AttrSizedOperandSegments // ODS-NEXT: DeclareOpInterfaceMethods, -// ODS-NEXT: NamedStructuredOpTrait // ODS-NEXT: SingleBlockImplicitTerminator<"YieldOp"> // // IMPL-LABEL: ArrayAttr Test3Op::iterator_types() { diff --git a/mlir/tools/mlir-linalg-ods-gen/mlir-linalg-ods-gen.cpp b/mlir/tools/mlir-linalg-ods-gen/mlir-linalg-ods-gen.cpp index 45dc115e6c1e9..0342fab5ab9f0 100644 --- a/mlir/tools/mlir-linalg-ods-gen/mlir-linalg-ods-gen.cpp +++ b/mlir/tools/mlir-linalg-ods-gen/mlir-linalg-ods-gen.cpp @@ -1453,54 +1453,45 @@ void TCParser::printODS(llvm::raw_ostream &os, StringRef cppOpName, const char *header = R"FMT( def {0} : LinalgStructuredBase_Op<"{1}", [ AttrSizedOperandSegments, DeclareOpInterfaceMethods, - NamedStructuredOpTrait, SingleBlockImplicitTerminator<"YieldOp">]> { let arguments = (ins Variadic:$inputs, - Variadic:$output_buffers, - Variadic:$init_tensors); + Variadic:$outputs); let results = (outs Variadic:$result_tensors); let regions = (region AnyRegion:$region); let skipDefaultBuilders = 1; let builders = [ OpBuilderDAG< - (ins "ValueRange":$inputs, "ValueRange":$outputBuffers), + (ins "ValueRange":$inputs, "ValueRange":$outputs), [{{ $_state.addOperands(inputs); - $_state.addOperands(outputBuffers); + $_state.addOperands(outputs); $_state.addAttribute( "operand_segment_sizes", $_builder.getI32VectorAttr({{ static_cast(inputs.size()), - static_cast(outputBuffers.size()), - static_cast(0)})); + static_cast(outputs.size())})); buildNamedStructuredOpRegionAndAttributes<{0}>( $_builder, $_state, TypeRange(inputs), - TypeRange(outputBuffers), - TypeRange(), - TypeRange()); + TypeRange(outputs)); }]>, OpBuilderDAG< (ins "TypeRange":$resultTensorTypes, "ValueRange":$inputs, - "ValueRange":$outputBuffers, "ValueRange":$initTensors), + "ValueRange":$outputs), [{{ $_state.addOperands(inputs); - $_state.addOperands(outputBuffers); - $_state.addOperands(initTensors); + $_state.addOperands(outputs); $_state.addTypes(resultTensorTypes); $_state.addAttribute( "operand_segment_sizes", $_builder.getI32VectorAttr({{ static_cast(inputs.size()), - static_cast(outputBuffers.size()), - static_cast(initTensors.size())})); + static_cast(outputs.size())})); buildNamedStructuredOpRegionAndAttributes<{0}>( $_builder, $_state, TypeRange(inputs), - TypeRange(outputBuffers), - TypeRange(initTensors), - resultTensorTypes); + TypeRange(outputs)); }]>, OpBuilderDAG< (ins "TypeRange":$resultTensorTypes, "ValueRange":$operands, CArg<"ArrayRef", "{{}">:$attributes), @@ -1513,7 +1504,6 @@ void TCParser::printODS(llvm::raw_ostream &os, StringRef cppOpName, ]; let printer = [{{ return ::printNamedStructuredOp(p, *this); }]; let parser = [{{ return ::parseNamedStructuredOp<{0}>(parser, result); }]; - let verifier = [{{ return ::verifyNamedStructuredOp(*this); }]; let hasFolder = 1; let hasCanonicalizer = 1; From bd2e83333ece6afa8a6c1975d19d403d63349414 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20G=C3=B3rny?= Date: Mon, 21 Dec 2020 22:26:33 +0100 Subject: [PATCH 036/378] [lldb] [Process/FreeBSDRemote] Remove anonymous namespace --- .../FreeBSDRemote/NativeRegisterContextFreeBSD_x86_64.cpp | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/lldb/source/Plugins/Process/FreeBSDRemote/NativeRegisterContextFreeBSD_x86_64.cpp b/lldb/source/Plugins/Process/FreeBSDRemote/NativeRegisterContextFreeBSD_x86_64.cpp index b3b4a6cb05784..d5052e7d1b3af 100644 --- a/lldb/source/Plugins/Process/FreeBSDRemote/NativeRegisterContextFreeBSD_x86_64.cpp +++ b/lldb/source/Plugins/Process/FreeBSDRemote/NativeRegisterContextFreeBSD_x86_64.cpp @@ -29,9 +29,6 @@ using namespace lldb_private; using namespace lldb_private::process_freebsd; -// Private namespace. - -namespace { // x86 64-bit general purpose registers. static const uint32_t g_gpr_regnums_x86_64[] = { lldb_rax_x86_64, lldb_rbx_x86_64, lldb_rcx_x86_64, lldb_rdx_x86_64, @@ -138,7 +135,7 @@ static_assert((sizeof(g_dbr_regnums_x86_64) / sizeof(g_dbr_regnums_x86_64[0])) - "g_dbr_regnums_x86_64 has wrong number of register infos"); // x86 32-bit general purpose registers. -const uint32_t g_gpr_regnums_i386[] = { +static const uint32_t g_gpr_regnums_i386[] = { lldb_eax_i386, lldb_ebx_i386, lldb_ecx_i386, lldb_edx_i386, lldb_edi_i386, lldb_esi_i386, lldb_ebp_i386, lldb_esp_i386, lldb_eip_i386, lldb_eflags_i386, lldb_cs_i386, lldb_fs_i386, @@ -155,7 +152,7 @@ static_assert((sizeof(g_gpr_regnums_i386) / sizeof(g_gpr_regnums_i386[0])) - "g_gpr_regnums_i386 has wrong number of register infos"); // x86 32-bit floating point registers. -const uint32_t g_fpu_regnums_i386[] = { +static const uint32_t g_fpu_regnums_i386[] = { lldb_fctrl_i386, lldb_fstat_i386, lldb_ftag_i386, lldb_fop_i386, lldb_fiseg_i386, lldb_fioff_i386, lldb_foseg_i386, lldb_fooff_i386, lldb_mxcsr_i386, lldb_mxcsrmask_i386, lldb_st0_i386, lldb_st1_i386, @@ -236,7 +233,6 @@ static const RegisterSet g_reg_sets_x86_64[k_num_register_sets] = { }; #define REG_CONTEXT_SIZE (GetRegisterInfoInterface().GetGPRSize()) -} // namespace NativeRegisterContextFreeBSD * NativeRegisterContextFreeBSD::CreateHostNativeRegisterContextFreeBSD( From 9d2529a38b34d06dbe17020b98db1ee21d9a628c Mon Sep 17 00:00:00 2001 From: ergawy Date: Mon, 21 Dec 2020 09:40:40 +0100 Subject: [PATCH 037/378] [MLIR][Docs] Fix a small typo in documentation. Just fixes a tiny typo in a link between 2 pages. Reviewed By: ftynse Differential Revision: https://reviews.llvm.org/D93616 --- mlir/docs/PatternRewriter.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/docs/PatternRewriter.md b/mlir/docs/PatternRewriter.md index ab93245395a34..fba5a9b8eac19 100644 --- a/mlir/docs/PatternRewriter.md +++ b/mlir/docs/PatternRewriter.md @@ -238,7 +238,7 @@ between, and within dialects using a concept of "legality". This framework allows for transforming illegal operations to those supported by a provided conversion target, via a set of pattern-based operation rewriting patterns. This framework also provides support for type conversions. More information on this -driver can be found [here](DialectConversion.nd). +driver can be found [here](DialectConversion.md). ### Greedy Pattern Rewrite Driver From 7c7b55b985136a975223a9cefccd8fa1a5df7765 Mon Sep 17 00:00:00 2001 From: Thomas Raoux Date: Mon, 21 Dec 2020 13:17:17 -0800 Subject: [PATCH 038/378] [mlir][vector] Extend vector unroll to all element-wise ops Extend unroll to support all element-wise ops and allow unrolling for ops with vector operands of with the same shape as the destination but different element type (like Cmp or Select). Differential Revision: https://reviews.llvm.org/D93121 --- .../mlir/Dialect/StandardOps/IR/Ops.td | 20 ++++++++--- mlir/lib/Dialect/Vector/VectorTransforms.cpp | 10 +++--- .../Dialect/Vector/vector-transforms.mlir | 36 ++++++++++++++++++- .../lib/Transforms/TestVectorTransforms.cpp | 5 ++- 4 files changed, 60 insertions(+), 11 deletions(-) diff --git a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td index 7af44f8435ffa..ba78db68214f4 100644 --- a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td +++ b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td @@ -69,7 +69,9 @@ class CastOp traits = []> : // Base class for arithmetic cast operations. class ArithmeticCastOp traits = []> : - CastOp { + CastOp])> { } // Base class for unary ops. Requires single operand and result. Individual @@ -104,6 +106,7 @@ class ArithmeticOp traits = []> : Op, ElementwiseMappable])> { let results = (outs AnyType:$result); @@ -992,6 +995,7 @@ def CmpFPredicateAttr : I64EnumAttr< def CmpFOp : Std_Op<"cmpf", [NoSideEffect, SameTypeOperands, ElementwiseMappable, + DeclareOpInterfaceMethods, TypesMatchWith< "result type has i1 element type and same shape as operands", "lhs", "result", "getI1SameShape($_self)">]> { @@ -1076,6 +1080,7 @@ def CmpIPredicateAttr : I64EnumAttr< def CmpIOp : Std_Op<"cmpi", [NoSideEffect, SameTypeOperands, ElementwiseMappable, + DeclareOpInterfaceMethods, TypesMatchWith< "result type has i1 element type and same shape as operands", "lhs", "result", "getI1SameShape($_self)">]> { @@ -2548,7 +2553,7 @@ def RsqrtOp : FloatUnaryOp<"rsqrt"> { def SelectOp : Std_Op<"select", [NoSideEffect, AllTypesMatch<["true_value", "false_value", "result"]>, - ElementwiseMappable]> { + ElementwiseMappable, DeclareOpInterfaceMethods]> { let summary = "select operation"; let description = [{ The `select` operation chooses one value based on a binary condition @@ -2779,7 +2784,8 @@ def SignedShiftRightOp : IntArithmeticOp<"shift_right_signed"> { //===----------------------------------------------------------------------===// def SignExtendIOp : Std_Op<"sexti", - [NoSideEffect, ElementwiseMappable]> { + [NoSideEffect, ElementwiseMappable, + DeclareOpInterfaceMethods]> { let summary = "integer sign extension operation"; let description = [{ The integer sign extension operation takes an integer input of @@ -3595,7 +3601,9 @@ def TransposeOp : Std_Op<"transpose", [NoSideEffect]>, // TruncateIOp //===----------------------------------------------------------------------===// -def TruncateIOp : Std_Op<"trunci", [NoSideEffect, ElementwiseMappable]> { +def TruncateIOp : Std_Op<"trunci", + [NoSideEffect, ElementwiseMappable, + DeclareOpInterfaceMethods,]> { let summary = "integer truncation operation"; let description = [{ The integer truncation operation takes an integer input of @@ -3862,7 +3870,9 @@ def XOrOp : IntArithmeticOp<"xor", [Commutative]> { // ZeroExtendIOp //===----------------------------------------------------------------------===// -def ZeroExtendIOp : Std_Op<"zexti", [NoSideEffect, ElementwiseMappable]> { +def ZeroExtendIOp : Std_Op<"zexti", + [NoSideEffect, ElementwiseMappable, + DeclareOpInterfaceMethods,]> { let summary = "integer zero extension operation"; let description = [{ The integer zero extension operation takes an integer input of diff --git a/mlir/lib/Dialect/Vector/VectorTransforms.cpp b/mlir/lib/Dialect/Vector/VectorTransforms.cpp index 1e58a759d305a..5ba82b39a5a65 100644 --- a/mlir/lib/Dialect/Vector/VectorTransforms.cpp +++ b/mlir/lib/Dialect/Vector/VectorTransforms.cpp @@ -492,8 +492,9 @@ static void getVectorElementwiseOpUnrollState(Operation *op, assert(resultType && "Expected op with vector result type"); auto resultShape = resultType.getShape(); // Verify that all operands have the same vector type as result. - assert(llvm::all_of(op->getOperandTypes(), - [=](Type type) { return type == resultType; })); + assert(llvm::all_of(op->getOperandTypes(), [=](Type type) { + return type.cast().getShape() == resultShape; + })); // Create trivial elementwise identity index map based on 'resultShape'. DenseMap indexMap; @@ -504,8 +505,9 @@ static void getVectorElementwiseOpUnrollState(Operation *op, // Create VectorState each operand and single result. unsigned numVectors = op->getNumOperands() + op->getNumResults(); vectors.resize(numVectors); - for (unsigned i = 0; i < op->getNumOperands(); ++i) - vectors[i] = {resultType, indexMap, i, false}; + for (auto it : llvm::enumerate(op->getOperandTypes())) + vectors[it.index()] = {it.value().cast(), indexMap, + static_cast(it.index()), false}; vectors[numVectors - 1] = {resultType, indexMap, -1, false}; resultIndex = numVectors - 1; } diff --git a/mlir/test/Dialect/Vector/vector-transforms.mlir b/mlir/test/Dialect/Vector/vector-transforms.mlir index 167314d36458e..43a83f04dd304 100644 --- a/mlir/test/Dialect/Vector/vector-transforms.mlir +++ b/mlir/test/Dialect/Vector/vector-transforms.mlir @@ -1,5 +1,4 @@ // RUN: mlir-opt %s -test-vector-to-vector-conversion | FileCheck %s -// RUN: mlir-opt %s -test-vector-unrolling-patterns | FileCheck %s // CHECK-DAG: #[[MAP1:map[0-9]+]] = affine_map<(d0, d1, d2) -> (d1, d2)> @@ -514,3 +513,38 @@ func @shape_cast_fold(%arg0 : vector<5x4x2xf32>, %arg1 : vector<3x4x2xf32>) return %6, %7 : vector<5x4x2xf32>, vector<3x4x2xf32> } + +// CHECK-LABEL: func @elementwise_unroll +// CHECK-SAME: (%[[ARG0:.*]]: memref<4x4xf32>, %[[ARG1:.*]]: memref<4x4xf32>) +// CHECK: %[[C0:.*]] = constant 0 : index +// CHECK: %[[C2:.*]] = constant 2 : index +// CHECK: %[[VT0:.*]] = vector.transfer_read %[[ARG0]][%[[C0]], %[[C0]]], {{.*}} : memref<4x4xf32>, vector<2x2xf32> +// CHECK: %[[VT1:.*]] = vector.transfer_read %[[ARG0]][%[[C0]], %[[C2]]], {{.*}} : memref<4x4xf32>, vector<2x2xf32> +// CHECK: %[[VT2:.*]] = vector.transfer_read %[[ARG0]][%[[C2]], %[[C0]]], {{.*}} : memref<4x4xf32>, vector<2x2xf32> +// CHECK: %[[VT3:.*]] = vector.transfer_read %[[ARG0]][%[[C2]], %[[C2]]], {{.*}} : memref<4x4xf32>, vector<2x2xf32> +// CHECK: %[[VT4:.*]] = vector.transfer_read %[[ARG1]][%[[C0]], %[[C0]]], {{.*}} : memref<4x4xf32>, vector<2x2xf32> +// CHECK: %[[VT5:.*]] = vector.transfer_read %[[ARG1]][%[[C0]], %[[C2]]], {{.*}} : memref<4x4xf32>, vector<2x2xf32> +// CHECK: %[[VT6:.*]] = vector.transfer_read %[[ARG1]][%[[C2]], %[[C0]]], {{.*}} : memref<4x4xf32>, vector<2x2xf32> +// CHECK: %[[VT7:.*]] = vector.transfer_read %[[ARG1]][%[[C2]], %[[C2]]], {{.*}} : memref<4x4xf32>, vector<2x2xf32> +// CHECK: %[[CMP0:.*]] = cmpf "ult", %[[VT0]], %[[VT4]] : vector<2x2xf32> +// CHECK: %[[CMP1:.*]] = cmpf "ult", %[[VT1]], %[[VT5]] : vector<2x2xf32> +// CHECK: %[[CMP2:.*]] = cmpf "ult", %[[VT2]], %[[VT6]] : vector<2x2xf32> +// CHECK: %[[CMP3:.*]] = cmpf "ult", %[[VT3]], %[[VT7]] : vector<2x2xf32> +// CHECK: %[[SEL0:.*]] = select %[[CMP0]], %[[VT0]], %[[VT4]] : vector<2x2xi1>, vector<2x2xf32> +// CHECK: %[[SEL1:.*]] = select %[[CMP1]], %[[VT1]], %[[VT5]] : vector<2x2xi1>, vector<2x2xf32> +// CHECK: %[[SEL2:.*]] = select %[[CMP2]], %[[VT2]], %[[VT6]] : vector<2x2xi1>, vector<2x2xf32> +// CHECK: %[[SEL3:.*]] = select %[[CMP3]], %[[VT3]], %[[VT7]] : vector<2x2xi1>, vector<2x2xf32> +// CHECK: vector.transfer_write %[[SEL0]], %[[ARG0]][%[[C0]], %[[C0]]] {{.*}} : vector<2x2xf32>, memref<4x4xf32> +// CHECK: vector.transfer_write %[[SEL1]], %[[ARG0]][%[[C0]], %[[C2]]] {{.*}} : vector<2x2xf32>, memref<4x4xf32> +// CHECK: vector.transfer_write %[[SEL2]], %[[ARG0]][%[[C2]], %[[C0]]] {{.*}} : vector<2x2xf32>, memref<4x4xf32> +// CHECK: vector.transfer_write %[[SEL3]], %[[ARG0]][%[[C2]], %[[C2]]] {{.*}} : vector<2x2xf32>, memref<4x4xf32> +func @elementwise_unroll(%arg0 : memref<4x4xf32>, %arg1 : memref<4x4xf32>) { + %c0 = constant 0 : index + %cf0 = constant 0.0 : f32 + %0 = vector.transfer_read %arg0[%c0, %c0], %cf0 : memref<4x4xf32>, vector<4x4xf32> + %1 = vector.transfer_read %arg1[%c0, %c0], %cf0 : memref<4x4xf32>, vector<4x4xf32> + %cond = cmpf "ult", %0, %1 : vector<4x4xf32> + %2 = select %cond, %0, %1 : vector<4x4xi1>, vector<4x4xf32> + vector.transfer_write %2, %arg0[%c0, %c0] : vector<4x4xf32>, memref<4x4xf32> + return +} diff --git a/mlir/test/lib/Transforms/TestVectorTransforms.cpp b/mlir/test/lib/Transforms/TestVectorTransforms.cpp index 99c336ef05653..f219ef04fce56 100644 --- a/mlir/test/lib/Transforms/TestVectorTransforms.cpp +++ b/mlir/test/lib/Transforms/TestVectorTransforms.cpp @@ -37,8 +37,11 @@ struct TestVectorToVectorConversion private: // Return the target shape based on op type. static Optional> getShape(Operation *op) { - if (isa(op)) + if (isa(op)) return SmallVector(2, 2); + if (auto transferOp = dyn_cast(op)) { + return SmallVector(transferOp.getVectorType().getRank(), 2); + } if (isa(op)) return SmallVector(3, 2); return llvm::None; From ffba47df76460905965df4b54cf6ba945d2eb1ce Mon Sep 17 00:00:00 2001 From: Scott Linder Date: Mon, 21 Dec 2020 17:38:56 +0000 Subject: [PATCH 039/378] Revert "[AMDGPU][HIP] Switch default DWARF version to 5" This reverts commit c4d10e7e9bb47b77fad43d8ddcfa328298f36c88. Differential Revision: https://reviews.llvm.org/D93648 --- clang/lib/Driver/ToolChains/AMDGPU.h | 2 +- clang/lib/Driver/ToolChains/HIP.h | 2 +- clang/test/Driver/amdgpu-toolchain.c | 2 +- clang/test/Driver/hip-toolchain-dwarf.hip | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/clang/lib/Driver/ToolChains/AMDGPU.h b/clang/lib/Driver/ToolChains/AMDGPU.h index f5448b76aee54..55ef6e01967ed 100644 --- a/clang/lib/Driver/ToolChains/AMDGPU.h +++ b/clang/lib/Driver/ToolChains/AMDGPU.h @@ -60,7 +60,7 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUToolChain : public Generic_ELF { public: AMDGPUToolChain(const Driver &D, const llvm::Triple &Triple, const llvm::opt::ArgList &Args); - unsigned GetDefaultDwarfVersion() const override { return 5; } + unsigned GetDefaultDwarfVersion() const override { return 4; } bool IsIntegratedAssemblerDefault() const override { return true; } bool IsMathErrnoDefault() const override { return false; } diff --git a/clang/lib/Driver/ToolChains/HIP.h b/clang/lib/Driver/ToolChains/HIP.h index ff58c5451b0b8..5e2be7138579a 100644 --- a/clang/lib/Driver/ToolChains/HIP.h +++ b/clang/lib/Driver/ToolChains/HIP.h @@ -99,7 +99,7 @@ class LLVM_LIBRARY_VISIBILITY HIPToolChain final : public ROCMToolChain { computeMSVCVersion(const Driver *D, const llvm::opt::ArgList &Args) const override; - unsigned GetDefaultDwarfVersion() const override { return 5; } + unsigned GetDefaultDwarfVersion() const override { return 4; } const ToolChain &HostTC; diff --git a/clang/test/Driver/amdgpu-toolchain.c b/clang/test/Driver/amdgpu-toolchain.c index cb92744eee6a3..ac558e0e26eb3 100644 --- a/clang/test/Driver/amdgpu-toolchain.c +++ b/clang/test/Driver/amdgpu-toolchain.c @@ -8,7 +8,7 @@ // AS_LINK: clang{{.*}} "-cc1as" // AS_LINK: ld.lld{{.*}} "-shared" -// DWARF_VER: "-dwarf-version=5" +// DWARF_VER: "-dwarf-version=4" // RUN: %clang -### -target amdgcn-amd-amdhsa -mcpu=gfx906 -nogpulib \ // RUN: -flto %s 2>&1 | FileCheck -check-prefix=LTO %s diff --git a/clang/test/Driver/hip-toolchain-dwarf.hip b/clang/test/Driver/hip-toolchain-dwarf.hip index c853d5cf07cf5..44d66fe52e047 100644 --- a/clang/test/Driver/hip-toolchain-dwarf.hip +++ b/clang/test/Driver/hip-toolchain-dwarf.hip @@ -6,4 +6,4 @@ // RUN: -x hip --cuda-gpu-arch=gfx803 %s \ // RUN: -Xarch_gfx803 -g 2>&1 | FileCheck %s -check-prefix=DWARF_VER -// DWARF_VER: "-dwarf-version=5" +// DWARF_VER: "-dwarf-version=4" From 76f4f42ebaf9146da3603943bea7c52ca58ae692 Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Mon, 14 Dec 2020 18:06:10 -0800 Subject: [PATCH 040/378] [NewPM] Add TargetMachine method to add alias analyses AMDGPUTargetMachine::adjustPassManager() adds some alias analyses to the legacy PM. We need a way to do the same for the new PM in order to port AMDGPUTargetMachine::adjustPassManager() to the new PM. Currently the new PM adds alias analyses by creating an AAManager via PassBuilder and overriding the AAManager a PassManager uses via FunctionAnalysisManager::registerPass(). We will continue to respect a custom AA pipeline that specifies an exact AA pipeline to use, but for "default" we will now add alias analyses that backends specify. Most uses of PassManager use the "default" AAManager created by PassBuilder::buildDefaultAAPipeline(). Backends can override the newly added TargetMachine::registerAliasAnalyses() to add custom alias analyses. Reviewed By: ychen Differential Revision: https://reviews.llvm.org/D93261 --- llvm/include/llvm/Passes/PassBuilder.h | 3 +++ llvm/include/llvm/Target/TargetMachine.h | 5 +++++ llvm/lib/Passes/PassBuilder.cpp | 4 ++++ 3 files changed, 12 insertions(+) diff --git a/llvm/include/llvm/Passes/PassBuilder.h b/llvm/include/llvm/Passes/PassBuilder.h index e2d22031dd5ec..5a13df5b0c860 100644 --- a/llvm/include/llvm/Passes/PassBuilder.h +++ b/llvm/include/llvm/Passes/PassBuilder.h @@ -460,6 +460,9 @@ class PassBuilder { /// Build the default `AAManager` with the default alias analysis pipeline /// registered. + /// + /// This also adds target-specific alias analyses registered via + /// TargetMachine::registerAliasAnalyses(). AAManager buildDefaultAAPipeline(); /// Parse a textual pass pipeline description into a \c diff --git a/llvm/include/llvm/Target/TargetMachine.h b/llvm/include/llvm/Target/TargetMachine.h index d4fc2d8f0887d..55b35d9c0d075 100644 --- a/llvm/include/llvm/Target/TargetMachine.h +++ b/llvm/include/llvm/Target/TargetMachine.h @@ -23,6 +23,7 @@ namespace llvm { +class AAManager; class Function; class GlobalValue; class MachineModuleInfoWrapperPass; @@ -322,6 +323,10 @@ class TargetMachine { virtual void registerPassBuilderCallbacks(PassBuilder &, bool DebugPassManager) {} + /// Allow the target to register alias analyses with the AAManager for use + /// with the new pass manager. Only affects the "default" AAManager. + virtual void registerAliasAnalyses(AAManager &) {} + /// Add passes to the specified pass manager to get the specified file /// emitted. Typically this will involve several steps of code generation. /// This method should return true if emission of this file type is not diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 4e8062c6a7890..635e7bab1a7af 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -1882,6 +1882,10 @@ AAManager PassBuilder::buildDefaultAAPipeline() { // results from `GlobalsAA` through a readonly proxy. AA.registerModuleAnalysis(); + // Add target-specific alias analyses. + if (TM) + TM->registerAliasAnalyses(AA); + return AA; } From d33abc337c74d03d4e49b8d81a2dba7f23594a1a Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Mon, 21 Dec 2020 13:46:45 -0800 Subject: [PATCH 041/378] Migrate MCContext::createTempSymbol call sites to AlwaysAddSuffix=true Most call sites set AlwaysAddSuffix to true. The two use cases do not really need false and can be more consistent with other temporary symbol usage. --- llvm/lib/Target/ARM/ARMAsmPrinter.cpp | 2 +- .../lib/Target/PowerPC/PPCPreEmitPeephole.cpp | 2 +- llvm/test/CodeGen/PowerPC/p10-spill-crun.ll | 4 +- .../PowerPC/pcrel-call-linkage-with-calls.ll | 20 ++--- .../CodeGen/PowerPC/pcrel-got-indirect.ll | 80 +++++++++---------- .../CodeGen/PowerPC/pcrel-linkeropt-option.ll | 8 +- llvm/test/CodeGen/PowerPC/pcrel-linkeropt.ll | 48 +++++------ .../PowerPC/pcrel-relocation-plus-offset.ll | 8 +- llvm/test/CodeGen/PowerPC/pcrel-tail-calls.ll | 4 +- llvm/test/CodeGen/PowerPC/pcrel.ll | 4 +- 10 files changed, 90 insertions(+), 90 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp index dd22e5dfe6e1b..4ab2ff12d3196 100644 --- a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp @@ -1897,7 +1897,7 @@ void ARMAsmPrinter::emitInstruction(const MachineInstr *MI) { // LSJLJEH: Register SrcReg = MI->getOperand(0).getReg(); Register ValReg = MI->getOperand(1).getReg(); - MCSymbol *Label = OutContext.createTempSymbol("SJLJEH", false, true); + MCSymbol *Label = OutContext.createTempSymbol("SJLJEH", true, true); OutStreamer->AddComment("eh_setjmp begin"); EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tMOVr) .addReg(ValReg) diff --git a/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp b/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp index 1ae73bc8de43f..a39489d353a26 100644 --- a/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp +++ b/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp @@ -339,7 +339,7 @@ static bool hasPCRelativeForm(MachineInstr &Use) { // Create the symbol. MCContext &Context = MF->getContext(); MCSymbol *Symbol = - Context.createTempSymbol(Twine("pcrel"), false, false); + Context.createTempSymbol(Twine("pcrel"), true, false); MachineOperand PCRelLabel = MachineOperand::CreateMCSymbol(Symbol, PPCII::MO_PCREL_OPT_FLAG); Pair->DefInst->addOperand(*MF, PCRelLabel); diff --git a/llvm/test/CodeGen/PowerPC/p10-spill-crun.ll b/llvm/test/CodeGen/PowerPC/p10-spill-crun.ll index 04f63a302c345..12a8a76c85e64 100644 --- a/llvm/test/CodeGen/PowerPC/p10-spill-crun.ll +++ b/llvm/test/CodeGen/PowerPC/p10-spill-crun.ll @@ -105,8 +105,8 @@ define dso_local void @P10_Spill_CR_UN(%2* %arg, %1* %arg1, i32 %arg2) local_unn ; CHECK-NEXT: paddi r4, 0, global_4@PCREL, 1 ; CHECK-NEXT: stw r3, 176(r1) ; CHECK-NEXT: pld r3, global_3@got@pcrel(0), 1 -; CHECK-NEXT: .Lpcrel: -; CHECK-NEXT: .reloc .Lpcrel-8,R_PPC64_PCREL_OPT,.-(.Lpcrel-8) +; CHECK-NEXT: .Lpcrel0: +; CHECK-NEXT: .reloc .Lpcrel0-8,R_PPC64_PCREL_OPT,.-(.Lpcrel0-8) ; CHECK-NEXT: ld r12, 0(r3) ; CHECK-NEXT: mtctr r12 ; CHECK-NEXT: bctrl diff --git a/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-with-calls.ll b/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-with-calls.ll index 8fa86ef50ea57..1eb48991db708 100644 --- a/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-with-calls.ll +++ b/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-with-calls.ll @@ -67,8 +67,8 @@ define dso_local signext i32 @DirectCallLocal2(i32 signext %a, i32 signext %b) l ; CHECK-S-NEXT: extsw r3, r3 ; CHECK-S-NEXT: bl localCall@notoc ; CHECK-S-NEXT: pld r4, externGlobalVar@got@pcrel(0), 1 -; CHECK-S-NEXT: .Lpcrel: -; CHECK-S-NEXT: .reloc .Lpcrel-8,R_PPC64_PCREL_OPT,.-(.Lpcrel-8) +; CHECK-S-NEXT: .Lpcrel0: +; CHECK-S-NEXT: .reloc .Lpcrel0-8,R_PPC64_PCREL_OPT,.-(.Lpcrel0-8) ; CHECK-S-NEXT: lwz r4, 0(r4) ; CHECK-S-NEXT: mullw r3, r4, r3 ; CHECK-S-NEXT: extsw r3, r3 @@ -152,8 +152,8 @@ define dso_local signext i32 @DirectCallExtern2(i32 signext %a, i32 signext %b) ; CHECK-S-NEXT: extsw r3, r3 ; CHECK-S-NEXT: bl externCall@notoc ; CHECK-S-NEXT: pld r4, externGlobalVar@got@pcrel(0), 1 -; CHECK-S-NEXT: .Lpcrel0: -; CHECK-S-NEXT: .reloc .Lpcrel0-8,R_PPC64_PCREL_OPT,.-(.Lpcrel0-8) +; CHECK-S-NEXT: .Lpcrel1: +; CHECK-S-NEXT: .reloc .Lpcrel1-8,R_PPC64_PCREL_OPT,.-(.Lpcrel1-8) ; CHECK-S-NEXT: lwz r4, 0(r4) ; CHECK-S-NEXT: mullw r3, r4, r3 ; CHECK-S-NEXT: extsw r3, r3 @@ -216,8 +216,8 @@ define dso_local signext i32 @TailCallLocal2(i32 signext %a) local_unnamed_addr ; CHECK-S: .localentry TailCallLocal2 ; CHECK-S: # %bb.0: # %entry ; CHECK-S: pld r4, externGlobalVar@got@pcrel(0), 1 -; CHECK-S-NEXT: .Lpcrel1: -; CHECK-S-NEXT: .reloc .Lpcrel1-8,R_PPC64_PCREL_OPT,.-(.Lpcrel1-8) +; CHECK-S-NEXT: .Lpcrel2: +; CHECK-S-NEXT: .reloc .Lpcrel2-8,R_PPC64_PCREL_OPT,.-(.Lpcrel2-8) ; CHECK-S-NEXT: lwz r4, 0(r4) ; CHECK-S-NEXT: add r3, r4, r3 ; CHECK-S-NEXT: extsw r3, r3 @@ -260,8 +260,8 @@ define dso_local signext i32 @TailCallExtern2(i32 signext %a) local_unnamed_addr ; CHECK-S: .localentry TailCallExtern2 ; CHECK-S: # %bb.0: # %entry ; CHECK-S: pld r4, externGlobalVar@got@pcrel(0), 1 -; CHECK-S-NEXT: .Lpcrel2: -; CHECK-S-NEXT: .reloc .Lpcrel2-8,R_PPC64_PCREL_OPT,.-(.Lpcrel2-8) +; CHECK-S-NEXT: .Lpcrel3: +; CHECK-S-NEXT: .reloc .Lpcrel3-8,R_PPC64_PCREL_OPT,.-(.Lpcrel3-8) ; CHECK-S-NEXT: lwz r4, 0(r4) ; CHECK-S-NEXT: add r3, r4, r3 ; CHECK-S-NEXT: extsw r3, r3 @@ -327,8 +327,8 @@ define dso_local signext i32 @IndirectCall2(i32 signext %a, i32 signext %b) loca ; CHECK-S-NEXT: mtctr r12 ; CHECK-S-NEXT: bctrl ; CHECK-S-NEXT: pld r4, externGlobalVar@got@pcrel(0), 1 -; CHECK-S-NEXT: .Lpcrel3: -; CHECK-S-NEXT: .reloc .Lpcrel3-8,R_PPC64_PCREL_OPT,.-(.Lpcrel3-8) +; CHECK-S-NEXT: .Lpcrel4: +; CHECK-S-NEXT: .reloc .Lpcrel4-8,R_PPC64_PCREL_OPT,.-(.Lpcrel4-8) ; CHECK-S-NEXT: lwz r4, 0(r4) ; CHECK-S-NEXT: mullw r3, r4, r3 ; CHECK-S-NEXT: extsw r3, r3 diff --git a/llvm/test/CodeGen/PowerPC/pcrel-got-indirect.ll b/llvm/test/CodeGen/PowerPC/pcrel-got-indirect.ll index 4d61b66d3bb77..7ade41ced3538 100644 --- a/llvm/test/CodeGen/PowerPC/pcrel-got-indirect.ll +++ b/llvm/test/CodeGen/PowerPC/pcrel-got-indirect.ll @@ -23,16 +23,16 @@ define dso_local signext i32 @ReadGlobalVarChar() local_unnamed_addr { ; LE-LABEL: ReadGlobalVarChar: ; LE: # %bb.0: # %entry ; LE-NEXT: pld r3, valChar@got@pcrel(0), 1 -; LE-NEXT: .Lpcrel: -; LE-NEXT: .reloc .Lpcrel-8,R_PPC64_PCREL_OPT,.-(.Lpcrel-8) +; LE-NEXT: .Lpcrel0: +; LE-NEXT: .reloc .Lpcrel0-8,R_PPC64_PCREL_OPT,.-(.Lpcrel0-8) ; LE-NEXT: lbz r3, 0(r3) ; LE-NEXT: blr ; ; BE-LABEL: ReadGlobalVarChar: ; BE: # %bb.0: # %entry ; BE-NEXT: pld r3, valChar@got@pcrel(0), 1 -; BE-NEXT: .Lpcrel: -; BE-NEXT: .reloc .Lpcrel-8,R_PPC64_PCREL_OPT,.-(.Lpcrel-8) +; BE-NEXT: .Lpcrel0: +; BE-NEXT: .reloc .Lpcrel0-8,R_PPC64_PCREL_OPT,.-(.Lpcrel0-8) ; BE-NEXT: lbz r3, 0(r3) ; BE-NEXT: blr entry: @@ -64,16 +64,16 @@ define dso_local signext i32 @ReadGlobalVarShort() local_unnamed_addr { ; LE-LABEL: ReadGlobalVarShort: ; LE: # %bb.0: # %entry ; LE-NEXT: pld r3, valShort@got@pcrel(0), 1 -; LE-NEXT: .Lpcrel0: -; LE-NEXT: .reloc .Lpcrel0-8,R_PPC64_PCREL_OPT,.-(.Lpcrel0-8) +; LE-NEXT: .Lpcrel1: +; LE-NEXT: .reloc .Lpcrel1-8,R_PPC64_PCREL_OPT,.-(.Lpcrel1-8) ; LE-NEXT: lha r3, 0(r3) ; LE-NEXT: blr ; ; BE-LABEL: ReadGlobalVarShort: ; BE: # %bb.0: # %entry ; BE-NEXT: pld r3, valShort@got@pcrel(0), 1 -; BE-NEXT: .Lpcrel0: -; BE-NEXT: .reloc .Lpcrel0-8,R_PPC64_PCREL_OPT,.-(.Lpcrel0-8) +; BE-NEXT: .Lpcrel1: +; BE-NEXT: .reloc .Lpcrel1-8,R_PPC64_PCREL_OPT,.-(.Lpcrel1-8) ; BE-NEXT: lha r3, 0(r3) ; BE-NEXT: blr entry: @@ -105,16 +105,16 @@ define dso_local signext i32 @ReadGlobalVarInt() local_unnamed_addr { ; LE-LABEL: ReadGlobalVarInt: ; LE: # %bb.0: # %entry ; LE-NEXT: pld r3, valInt@got@pcrel(0), 1 -; LE-NEXT: .Lpcrel1: -; LE-NEXT: .reloc .Lpcrel1-8,R_PPC64_PCREL_OPT,.-(.Lpcrel1-8) +; LE-NEXT: .Lpcrel2: +; LE-NEXT: .reloc .Lpcrel2-8,R_PPC64_PCREL_OPT,.-(.Lpcrel2-8) ; LE-NEXT: lwa r3, 0(r3) ; LE-NEXT: blr ; ; BE-LABEL: ReadGlobalVarInt: ; BE: # %bb.0: # %entry ; BE-NEXT: pld r3, valInt@got@pcrel(0), 1 -; BE-NEXT: .Lpcrel1: -; BE-NEXT: .reloc .Lpcrel1-8,R_PPC64_PCREL_OPT,.-(.Lpcrel1-8) +; BE-NEXT: .Lpcrel2: +; BE-NEXT: .reloc .Lpcrel2-8,R_PPC64_PCREL_OPT,.-(.Lpcrel2-8) ; BE-NEXT: lwa r3, 0(r3) ; BE-NEXT: blr entry: @@ -145,16 +145,16 @@ define dso_local signext i32 @ReadGlobalVarUnsigned() local_unnamed_addr { ; LE-LABEL: ReadGlobalVarUnsigned: ; LE: # %bb.0: # %entry ; LE-NEXT: pld r3, valUnsigned@got@pcrel(0), 1 -; LE-NEXT: .Lpcrel2: -; LE-NEXT: .reloc .Lpcrel2-8,R_PPC64_PCREL_OPT,.-(.Lpcrel2-8) +; LE-NEXT: .Lpcrel3: +; LE-NEXT: .reloc .Lpcrel3-8,R_PPC64_PCREL_OPT,.-(.Lpcrel3-8) ; LE-NEXT: lwa r3, 0(r3) ; LE-NEXT: blr ; ; BE-LABEL: ReadGlobalVarUnsigned: ; BE: # %bb.0: # %entry ; BE-NEXT: pld r3, valUnsigned@got@pcrel(0), 1 -; BE-NEXT: .Lpcrel2: -; BE-NEXT: .reloc .Lpcrel2-8,R_PPC64_PCREL_OPT,.-(.Lpcrel2-8) +; BE-NEXT: .Lpcrel3: +; BE-NEXT: .reloc .Lpcrel3-8,R_PPC64_PCREL_OPT,.-(.Lpcrel3-8) ; BE-NEXT: lwa r3, 0(r3) ; BE-NEXT: blr entry: @@ -185,16 +185,16 @@ define dso_local signext i32 @ReadGlobalVarLong() local_unnamed_addr { ; LE-LABEL: ReadGlobalVarLong: ; LE: # %bb.0: # %entry ; LE-NEXT: pld r3, valLong@got@pcrel(0), 1 -; LE-NEXT: .Lpcrel3: -; LE-NEXT: .reloc .Lpcrel3-8,R_PPC64_PCREL_OPT,.-(.Lpcrel3-8) +; LE-NEXT: .Lpcrel4: +; LE-NEXT: .reloc .Lpcrel4-8,R_PPC64_PCREL_OPT,.-(.Lpcrel4-8) ; LE-NEXT: lwa r3, 0(r3) ; LE-NEXT: blr ; ; BE-LABEL: ReadGlobalVarLong: ; BE: # %bb.0: # %entry ; BE-NEXT: pld r3, valLong@got@pcrel(0), 1 -; BE-NEXT: .Lpcrel3: -; BE-NEXT: .reloc .Lpcrel3-8,R_PPC64_PCREL_OPT,.-(.Lpcrel3-8) +; BE-NEXT: .Lpcrel4: +; BE-NEXT: .reloc .Lpcrel4-8,R_PPC64_PCREL_OPT,.-(.Lpcrel4-8) ; BE-NEXT: lwa r3, 4(r3) ; BE-NEXT: blr entry: @@ -226,16 +226,16 @@ define dso_local i32* @ReadGlobalPtr() local_unnamed_addr { ; LE-LABEL: ReadGlobalPtr: ; LE: # %bb.0: # %entry ; LE-NEXT: pld r3, ptr@got@pcrel(0), 1 -; LE-NEXT: .Lpcrel4: -; LE-NEXT: .reloc .Lpcrel4-8,R_PPC64_PCREL_OPT,.-(.Lpcrel4-8) +; LE-NEXT: .Lpcrel5: +; LE-NEXT: .reloc .Lpcrel5-8,R_PPC64_PCREL_OPT,.-(.Lpcrel5-8) ; LE-NEXT: ld r3, 0(r3) ; LE-NEXT: blr ; ; BE-LABEL: ReadGlobalPtr: ; BE: # %bb.0: # %entry ; BE-NEXT: pld r3, ptr@got@pcrel(0), 1 -; BE-NEXT: .Lpcrel4: -; BE-NEXT: .reloc .Lpcrel4-8,R_PPC64_PCREL_OPT,.-(.Lpcrel4-8) +; BE-NEXT: .Lpcrel5: +; BE-NEXT: .reloc .Lpcrel5-8,R_PPC64_PCREL_OPT,.-(.Lpcrel5-8) ; BE-NEXT: ld r3, 0(r3) ; BE-NEXT: blr entry: @@ -247,9 +247,9 @@ define dso_local void @WriteGlobalPtr() local_unnamed_addr { ; LE-LABEL: WriteGlobalPtr: ; LE: # %bb.0: # %entry ; LE-NEXT: pld r3, ptr@got@pcrel(0), 1 -; LE-NEXT: .Lpcrel5: +; LE-NEXT: .Lpcrel6: ; LE-NEXT: li r4, 3 -; LE-NEXT: .reloc .Lpcrel5-8,R_PPC64_PCREL_OPT,.-(.Lpcrel5-8) +; LE-NEXT: .reloc .Lpcrel6-8,R_PPC64_PCREL_OPT,.-(.Lpcrel6-8) ; LE-NEXT: ld r3, 0(r3) ; LE-NEXT: stw r4, 0(r3) ; LE-NEXT: blr @@ -257,9 +257,9 @@ define dso_local void @WriteGlobalPtr() local_unnamed_addr { ; BE-LABEL: WriteGlobalPtr: ; BE: # %bb.0: # %entry ; BE-NEXT: pld r3, ptr@got@pcrel(0), 1 -; BE-NEXT: .Lpcrel5: +; BE-NEXT: .Lpcrel6: ; BE-NEXT: li r4, 3 -; BE-NEXT: .reloc .Lpcrel5-8,R_PPC64_PCREL_OPT,.-(.Lpcrel5-8) +; BE-NEXT: .reloc .Lpcrel6-8,R_PPC64_PCREL_OPT,.-(.Lpcrel6-8) ; BE-NEXT: ld r3, 0(r3) ; BE-NEXT: stw r4, 0(r3) ; BE-NEXT: blr @@ -287,16 +287,16 @@ define dso_local signext i32 @ReadGlobalArray() local_unnamed_addr { ; LE-LABEL: ReadGlobalArray: ; LE: # %bb.0: # %entry ; LE-NEXT: pld r3, array@got@pcrel(0), 1 -; LE-NEXT: .Lpcrel6: -; LE-NEXT: .reloc .Lpcrel6-8,R_PPC64_PCREL_OPT,.-(.Lpcrel6-8) +; LE-NEXT: .Lpcrel7: +; LE-NEXT: .reloc .Lpcrel7-8,R_PPC64_PCREL_OPT,.-(.Lpcrel7-8) ; LE-NEXT: lwa r3, 12(r3) ; LE-NEXT: blr ; ; BE-LABEL: ReadGlobalArray: ; BE: # %bb.0: # %entry ; BE-NEXT: pld r3, array@got@pcrel(0), 1 -; BE-NEXT: .Lpcrel6: -; BE-NEXT: .reloc .Lpcrel6-8,R_PPC64_PCREL_OPT,.-(.Lpcrel6-8) +; BE-NEXT: .Lpcrel7: +; BE-NEXT: .reloc .Lpcrel7-8,R_PPC64_PCREL_OPT,.-(.Lpcrel7-8) ; BE-NEXT: lwa r3, 12(r3) ; BE-NEXT: blr entry: @@ -327,16 +327,16 @@ define dso_local signext i32 @ReadGlobalStruct() local_unnamed_addr { ; LE-LABEL: ReadGlobalStruct: ; LE: # %bb.0: # %entry ; LE-NEXT: pld r3, structure@got@pcrel(0), 1 -; LE-NEXT: .Lpcrel7: -; LE-NEXT: .reloc .Lpcrel7-8,R_PPC64_PCREL_OPT,.-(.Lpcrel7-8) +; LE-NEXT: .Lpcrel8: +; LE-NEXT: .reloc .Lpcrel8-8,R_PPC64_PCREL_OPT,.-(.Lpcrel8-8) ; LE-NEXT: lwa r3, 4(r3) ; LE-NEXT: blr ; ; BE-LABEL: ReadGlobalStruct: ; BE: # %bb.0: # %entry ; BE-NEXT: pld r3, structure@got@pcrel(0), 1 -; BE-NEXT: .Lpcrel7: -; BE-NEXT: .reloc .Lpcrel7-8,R_PPC64_PCREL_OPT,.-(.Lpcrel7-8) +; BE-NEXT: .Lpcrel8: +; BE-NEXT: .reloc .Lpcrel8-8,R_PPC64_PCREL_OPT,.-(.Lpcrel8-8) ; BE-NEXT: lwa r3, 4(r3) ; BE-NEXT: blr entry: @@ -368,8 +368,8 @@ define dso_local void @ReadFuncPtr() local_unnamed_addr { ; LE: .localentry ReadFuncPtr, 1 ; LE-NEXT: # %bb.0: # %entry ; LE-NEXT: pld r3, ptrfunc@got@pcrel(0), 1 -; LE-NEXT: .Lpcrel8: -; LE-NEXT: .reloc .Lpcrel8-8,R_PPC64_PCREL_OPT,.-(.Lpcrel8-8) +; LE-NEXT: .Lpcrel9: +; LE-NEXT: .reloc .Lpcrel9-8,R_PPC64_PCREL_OPT,.-(.Lpcrel9-8) ; LE-NEXT: ld r12, 0(r3) ; LE-NEXT: mtctr r12 ; LE-NEXT: bctr @@ -379,8 +379,8 @@ define dso_local void @ReadFuncPtr() local_unnamed_addr { ; BE: .localentry ReadFuncPtr, 1 ; BE-NEXT: # %bb.0: # %entry ; BE-NEXT: pld r3, ptrfunc@got@pcrel(0), 1 -; BE-NEXT: .Lpcrel8: -; BE-NEXT: .reloc .Lpcrel8-8,R_PPC64_PCREL_OPT,.-(.Lpcrel8-8) +; BE-NEXT: .Lpcrel9: +; BE-NEXT: .reloc .Lpcrel9-8,R_PPC64_PCREL_OPT,.-(.Lpcrel9-8) ; BE-NEXT: ld r12, 0(r3) ; BE-NEXT: mtctr r12 ; BE-NEXT: bctr diff --git a/llvm/test/CodeGen/PowerPC/pcrel-linkeropt-option.ll b/llvm/test/CodeGen/PowerPC/pcrel-linkeropt-option.ll index 8e470b6f8ccba..564bd29f181a3 100644 --- a/llvm/test/CodeGen/PowerPC/pcrel-linkeropt-option.ll +++ b/llvm/test/CodeGen/PowerPC/pcrel-linkeropt-option.ll @@ -15,16 +15,16 @@ define dso_local i8 @Read8() local_unnamed_addr { ; DEFAULT-LABEL: Read8: ; DEFAULT: # %bb.0: # %entry ; DEFAULT-NEXT: pld r3, input8@got@pcrel(0), 1 -; DEFAULT-NEXT: .Lpcrel: -; DEFAULT-NEXT: .reloc .Lpcrel-8,R_PPC64_PCREL_OPT,.-(.Lpcrel-8) +; DEFAULT-NEXT: .Lpcrel0: +; DEFAULT-NEXT: .reloc .Lpcrel0-8,R_PPC64_PCREL_OPT,.-(.Lpcrel0-8) ; DEFAULT-NEXT: lbz r3, 0(r3) ; DEFAULT-NEXT: blr ; ; ON-LABEL: Read8: ; ON: # %bb.0: # %entry ; ON-NEXT: pld r3, input8@got@pcrel(0), 1 -; ON-NEXT: .Lpcrel: -; ON-NEXT: .reloc .Lpcrel-8,R_PPC64_PCREL_OPT,.-(.Lpcrel-8) +; ON-NEXT: .Lpcrel0: +; ON-NEXT: .reloc .Lpcrel0-8,R_PPC64_PCREL_OPT,.-(.Lpcrel0-8) ; ON-NEXT: lbz r3, 0(r3) ; ON-NEXT: blr ; diff --git a/llvm/test/CodeGen/PowerPC/pcrel-linkeropt.ll b/llvm/test/CodeGen/PowerPC/pcrel-linkeropt.ll index e878e74399115..604d57aa7f853 100644 --- a/llvm/test/CodeGen/PowerPC/pcrel-linkeropt.ll +++ b/llvm/test/CodeGen/PowerPC/pcrel-linkeropt.ll @@ -38,9 +38,9 @@ define dso_local void @ReadWrite8() local_unnamed_addr #0 { ; CHECK-LABEL: ReadWrite8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pld r3, input8@got@pcrel(0), 1 -; CHECK-NEXT: .Lpcrel: +; CHECK-NEXT: .Lpcrel0: ; CHECK-NEXT: pld r4, output8@got@pcrel(0), 1 -; CHECK-NEXT: .reloc .Lpcrel-8,R_PPC64_PCREL_OPT,.-(.Lpcrel-8) +; CHECK-NEXT: .reloc .Lpcrel0-8,R_PPC64_PCREL_OPT,.-(.Lpcrel0-8) ; CHECK-NEXT: lbz r3, 0(r3) ; In this test the stb r3, 0(r4) cannot be optimized because it ; uses the register r3 and that register is defined by lbz r3, 0(r3) @@ -57,9 +57,9 @@ define dso_local void @ReadWrite16() local_unnamed_addr #0 { ; CHECK-LABEL: ReadWrite16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pld r3, input16@got@pcrel(0), 1 -; CHECK-NEXT: .Lpcrel0: +; CHECK-NEXT: .Lpcrel1: ; CHECK-NEXT: pld r4, output16@got@pcrel(0), 1 -; CHECK-NEXT: .reloc .Lpcrel0-8,R_PPC64_PCREL_OPT,.-(.Lpcrel0-8) +; CHECK-NEXT: .reloc .Lpcrel1-8,R_PPC64_PCREL_OPT,.-(.Lpcrel1-8) ; CHECK-NEXT: lhz r3, 0(r3) ; In this test the sth r3, 0(r4) cannot be optimized because it ; uses the register r3 and that register is defined by lhz r3, 0(r3) @@ -76,9 +76,9 @@ define dso_local void @ReadWrite32() local_unnamed_addr #0 { ; CHECK-LABEL: ReadWrite32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pld r3, input32@got@pcrel(0), 1 -; CHECK-NEXT: .Lpcrel1: +; CHECK-NEXT: .Lpcrel2: ; CHECK-NEXT: pld r4, output32@got@pcrel(0), 1 -; CHECK-NEXT: .reloc .Lpcrel1-8,R_PPC64_PCREL_OPT,.-(.Lpcrel1-8) +; CHECK-NEXT: .reloc .Lpcrel2-8,R_PPC64_PCREL_OPT,.-(.Lpcrel2-8) ; CHECK-NEXT: lwz r3, 0(r3) ; CHECK-NEXT: stw r3, 0(r4) ; CHECK-NEXT: blr @@ -92,9 +92,9 @@ define dso_local void @ReadWrite64() local_unnamed_addr #0 { ; CHECK-LABEL: ReadWrite64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pld r3, input64@got@pcrel(0), 1 -; CHECK-NEXT: .Lpcrel2: +; CHECK-NEXT: .Lpcrel3: ; CHECK-NEXT: pld r4, output64@got@pcrel(0), 1 -; CHECK-NEXT: .reloc .Lpcrel2-8,R_PPC64_PCREL_OPT,.-(.Lpcrel2-8) +; CHECK-NEXT: .reloc .Lpcrel3-8,R_PPC64_PCREL_OPT,.-(.Lpcrel3-8) ; CHECK-NEXT: ld r3, 0(r3) ; CHECK-NEXT: std r3, 0(r4) ; CHECK-NEXT: blr @@ -124,9 +124,9 @@ define dso_local void @ReadWritef32() local_unnamed_addr #0 { ; CHECK-LABEL: ReadWritef32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pld r3, inputf32@got@pcrel(0), 1 -; CHECK-NEXT: .Lpcrel3: +; CHECK-NEXT: .Lpcrel4: ; CHECK-NEXT: xxspltidp vs1, 1078103900 -; CHECK-NEXT: .reloc .Lpcrel3-8,R_PPC64_PCREL_OPT,.-(.Lpcrel3-8) +; CHECK-NEXT: .reloc .Lpcrel4-8,R_PPC64_PCREL_OPT,.-(.Lpcrel4-8) ; CHECK-NEXT: lfs f0, 0(r3) ; CHECK-NEXT: pld r3, outputf32@got@pcrel(0), 1 ; CHECK-NEXT: xsaddsp f0, f0, f1 @@ -143,9 +143,9 @@ define dso_local void @ReadWritef64() local_unnamed_addr #0 { ; CHECK-LABEL: ReadWritef64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pld r3, inputf64@got@pcrel(0), 1 -; CHECK-NEXT: .Lpcrel4: +; CHECK-NEXT: .Lpcrel5: ; CHECK-NEXT: plfd f1, .LCPI6_0@PCREL(0), 1 -; CHECK-NEXT: .reloc .Lpcrel4-8,R_PPC64_PCREL_OPT,.-(.Lpcrel4-8) +; CHECK-NEXT: .reloc .Lpcrel5-8,R_PPC64_PCREL_OPT,.-(.Lpcrel5-8) ; CHECK-NEXT: lfd f0, 0(r3) ; CHECK-NEXT: pld r3, outputf64@got@pcrel(0), 1 ; CHECK-NEXT: xsadddp f0, f0, f1 @@ -196,9 +196,9 @@ define dso_local void @ReadWriteArray() local_unnamed_addr #0 { ; CHECK-LABEL: ReadWriteArray: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pld r3, ArrayIn@got@pcrel(0), 1 -; CHECK-NEXT: .Lpcrel5: +; CHECK-NEXT: .Lpcrel6: ; CHECK-NEXT: pld r4, ArrayOut@got@pcrel(0), 1 -; CHECK-NEXT: .reloc .Lpcrel5-8,R_PPC64_PCREL_OPT,.-(.Lpcrel5-8) +; CHECK-NEXT: .reloc .Lpcrel6-8,R_PPC64_PCREL_OPT,.-(.Lpcrel6-8) ; CHECK-NEXT: lwz r3, 28(r3) ; CHECK-NEXT: addi r3, r3, 42 ; CHECK-NEXT: stw r3, 8(r4) @@ -229,12 +229,12 @@ define dso_local void @ReadWriteIntPtr() local_unnamed_addr #0 { ; CHECK-LABEL: ReadWriteIntPtr: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pld r3, IntPtrIn@got@pcrel(0), 1 -; CHECK-NEXT: .Lpcrel6: -; CHECK-NEXT: pld r4, IntPtrOut@got@pcrel(0), 1 ; CHECK-NEXT: .Lpcrel7: -; CHECK-NEXT: .reloc .Lpcrel6-8,R_PPC64_PCREL_OPT,.-(.Lpcrel6-8) -; CHECK-NEXT: ld r3, 0(r3) +; CHECK-NEXT: pld r4, IntPtrOut@got@pcrel(0), 1 +; CHECK-NEXT: .Lpcrel8: ; CHECK-NEXT: .reloc .Lpcrel7-8,R_PPC64_PCREL_OPT,.-(.Lpcrel7-8) +; CHECK-NEXT: ld r3, 0(r3) +; CHECK-NEXT: .reloc .Lpcrel8-8,R_PPC64_PCREL_OPT,.-(.Lpcrel8-8) ; CHECK-NEXT: ld r4, 0(r4) ; CHECK-NEXT: lwz r5, 216(r3) ; CHECK-NEXT: lwz r3, 48(r3) @@ -258,9 +258,9 @@ define dso_local void @ReadWriteFuncPtr() local_unnamed_addr #0 { ; CHECK-LABEL: ReadWriteFuncPtr: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pld r3, FuncPtrIn@got@pcrel(0), 1 -; CHECK-NEXT: .Lpcrel8: +; CHECK-NEXT: .Lpcrel9: ; CHECK-NEXT: pld r4, FuncPtrOut@got@pcrel(0), 1 -; CHECK-NEXT: .reloc .Lpcrel8-8,R_PPC64_PCREL_OPT,.-(.Lpcrel8-8) +; CHECK-NEXT: .reloc .Lpcrel9-8,R_PPC64_PCREL_OPT,.-(.Lpcrel9-8) ; CHECK-NEXT: ld r3, 0(r3) ; CHECK-NEXT: std r3, 0(r4) ; CHECK-NEXT: blr @@ -289,8 +289,8 @@ define dso_local void @FuncPtrCall() local_unnamed_addr #0 { ; CHECK: .localentry FuncPtrCall, 1 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: pld r3, FuncPtrIn@got@pcrel(0), 1 -; CHECK-NEXT: .Lpcrel9: -; CHECK-NEXT: .reloc .Lpcrel9-8,R_PPC64_PCREL_OPT,.-(.Lpcrel9-8) +; CHECK-NEXT: .Lpcrel10: +; CHECK-NEXT: .reloc .Lpcrel10-8,R_PPC64_PCREL_OPT,.-(.Lpcrel10-8) ; CHECK-NEXT: ld r12, 0(r3) ; CHECK-NEXT: mtctr r12 ; CHECK-NEXT: bctr @@ -305,8 +305,8 @@ define dso_local signext i32 @ReadVecElement() local_unnamed_addr #0 { ; CHECK-LABEL: ReadVecElement: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pld r3, inputVi32@got@pcrel(0), 1 -; CHECK-NEXT: .Lpcrel10: -; CHECK-NEXT: .reloc .Lpcrel10-8,R_PPC64_PCREL_OPT,.-(.Lpcrel10-8) +; CHECK-NEXT: .Lpcrel11: +; CHECK-NEXT: .reloc .Lpcrel11-8,R_PPC64_PCREL_OPT,.-(.Lpcrel11-8) ; CHECK-NEXT: lwa r3, 4(r3) ; CHECK-NEXT: blr entry: diff --git a/llvm/test/CodeGen/PowerPC/pcrel-relocation-plus-offset.ll b/llvm/test/CodeGen/PowerPC/pcrel-relocation-plus-offset.ll index 44d3f7a50e9bf..98ea41f7bb26f 100644 --- a/llvm/test/CodeGen/PowerPC/pcrel-relocation-plus-offset.ll +++ b/llvm/test/CodeGen/PowerPC/pcrel-relocation-plus-offset.ll @@ -50,8 +50,8 @@ define dso_local signext i32 @getElementExtern4() local_unnamed_addr { ; CHECK-S-LABEL: getElementExtern4: ; CHECK-S: # %bb.0: # %entry ; CHECK-S-NEXT: pld r3, array1@got@pcrel(0), 1 -; CHECK-S-NEXT: .Lpcrel: -; CHECK-S-NEXT: .reloc .Lpcrel-8,R_PPC64_PCREL_OPT,.-(.Lpcrel-8) +; CHECK-S-NEXT: .Lpcrel0: +; CHECK-S-NEXT: .reloc .Lpcrel0-8,R_PPC64_PCREL_OPT,.-(.Lpcrel0-8) ; CHECK-S-NEXT: lwa r3, 16(r3) ; CHECK-S-NEXT: blr ; CHECK-O-LABEL: : @@ -69,8 +69,8 @@ define dso_local signext i32 @getElementExternNegative() local_unnamed_addr { ; CHECK-S-LABEL: getElementExternNegative: ; CHECK-S: # %bb.0: # %entry ; CHECK-S-NEXT: pld r3, array1@got@pcrel(0), 1 -; CHECK-S-NEXT: .Lpcrel0: -; CHECK-S-NEXT: .reloc .Lpcrel0-8,R_PPC64_PCREL_OPT,.-(.Lpcrel0-8) +; CHECK-S-NEXT: .Lpcrel1: +; CHECK-S-NEXT: .reloc .Lpcrel1-8,R_PPC64_PCREL_OPT,.-(.Lpcrel1-8) ; CHECK-S-NEXT: lwa r3, -4(r3) ; CHECK-S-NEXT: blr ; CHECK-O-LABEL: : diff --git a/llvm/test/CodeGen/PowerPC/pcrel-tail-calls.ll b/llvm/test/CodeGen/PowerPC/pcrel-tail-calls.ll index 1340197b3ccba..583e7950b6d53 100644 --- a/llvm/test/CodeGen/PowerPC/pcrel-tail-calls.ll +++ b/llvm/test/CodeGen/PowerPC/pcrel-tail-calls.ll @@ -51,8 +51,8 @@ define dso_local void @TailCallExtrnFuncPtr() local_unnamed_addr { ; CHECK: .localentry TailCallExtrnFuncPtr, 1 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: pld r3, Func@got@pcrel(0), 1 -; CHECK-NEXT: .Lpcrel: -; CHECK-NEXT: .reloc .Lpcrel-8,R_PPC64_PCREL_OPT,.-(.Lpcrel-8) +; CHECK-NEXT: .Lpcrel0: +; CHECK-NEXT: .reloc .Lpcrel0-8,R_PPC64_PCREL_OPT,.-(.Lpcrel0-8) ; CHECK-NEXT: ld r12, 0(r3) ; CHECK-NEXT: mtctr r12 ; CHECK-NEXT: bctr diff --git a/llvm/test/CodeGen/PowerPC/pcrel.ll b/llvm/test/CodeGen/PowerPC/pcrel.ll index 55783180cfaca..1d3d96a92904f 100644 --- a/llvm/test/CodeGen/PowerPC/pcrel.ll +++ b/llvm/test/CodeGen/PowerPC/pcrel.ll @@ -41,8 +41,8 @@ define dso_local signext i32 @ReadGlobalVarInt() local_unnamed_addr { ; CHECK-S-LABEL: ReadGlobalVarInt ; CHECK-S: # %bb.0: # %entry ; CHECK-S-NEXT: pld r3, valIntGlob@got@pcrel(0), 1 -; CHECK-S-NEXT: .Lpcrel: -; CHECK-S-NEXT: .reloc .Lpcrel-8,R_PPC64_PCREL_OPT,.-(.Lpcrel-8) +; CHECK-S-NEXT: .Lpcrel0: +; CHECK-S-NEXT: .reloc .Lpcrel0-8,R_PPC64_PCREL_OPT,.-(.Lpcrel0-8) ; CHECK-S-NEXT: lwa r3, 0(r3) ; CHECK-S-NEXT: blr From d9a0c40bce5f0b1325b89c36785d82fa146547aa Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Mon, 21 Dec 2020 14:04:13 -0800 Subject: [PATCH 042/378] [MC] Split MCContext::createTempSymbol, default AlwaysAddSuffix to true, and add comments CanBeUnnamed is rarely false. Splitting to a createNamedTempSymbol makes the intention clearer and matches the direction of reverted r240130 (to drop the unneeded parameters). No behavior change. --- llvm/include/llvm/MC/MCContext.h | 16 ++++++++++------ llvm/lib/CodeGen/MachineModuleInfo.cpp | 3 ++- llvm/lib/MC/MCContext.cpp | 19 +++++++++++++------ llvm/lib/MC/MCDwarf.cpp | 10 ++++------ llvm/lib/MC/MCObjectStreamer.cpp | 2 +- llvm/lib/MC/MCSection.cpp | 2 +- llvm/lib/MC/MCStreamer.cpp | 2 +- llvm/lib/Target/ARM/ARMAsmPrinter.cpp | 2 +- .../lib/Target/PowerPC/PPCPreEmitPeephole.cpp | 3 +-- .../Target/RISCV/AsmParser/RISCVAsmParser.cpp | 3 +-- 10 files changed, 35 insertions(+), 27 deletions(-) diff --git a/llvm/include/llvm/MC/MCContext.h b/llvm/include/llvm/MC/MCContext.h index 75e6dbe069e22..49ab0ce8d6fd0 100644 --- a/llvm/include/llvm/MC/MCContext.h +++ b/llvm/include/llvm/MC/MCContext.h @@ -397,12 +397,16 @@ namespace llvm { /// unspecified name. MCSymbol *createLinkerPrivateTempSymbol(); - /// Create and return a new assembler temporary symbol with a unique but - /// unspecified name. - MCSymbol *createTempSymbol(bool CanBeUnnamed = true); - - MCSymbol *createTempSymbol(const Twine &Name, bool AlwaysAddSuffix, - bool CanBeUnnamed = true); + /// Create a temporary symbol with a unique name. The name will be omitted + /// in the symbol table if UseNamesOnTempLabels is false (default except + /// MCAsmStreamer). The overload without Name uses an unspecified name. + MCSymbol *createTempSymbol(); + MCSymbol *createTempSymbol(const Twine &Name, bool AlwaysAddSuffix = true); + + /// Create a temporary symbol with a unique name whose name cannot be + /// omitted in the symbol table. This is rarely used. + MCSymbol *createNamedTempSymbol(); + MCSymbol *createNamedTempSymbol(const Twine &Name); /// Create the definition of a directional local symbol for numbered label /// (used for "1:" definitions). diff --git a/llvm/lib/CodeGen/MachineModuleInfo.cpp b/llvm/lib/CodeGen/MachineModuleInfo.cpp index f75acbb2494b5..5c2e2fb16b691 100644 --- a/llvm/lib/CodeGen/MachineModuleInfo.cpp +++ b/llvm/lib/CodeGen/MachineModuleInfo.cpp @@ -104,7 +104,8 @@ ArrayRef MMIAddrLabelMap::getAddrLabelSymbolToEmit(BasicBlock *BB) { BBCallbacks.back().setMap(this); Entry.Index = BBCallbacks.size() - 1; Entry.Fn = BB->getParent(); - MCSymbol *Sym = Context.createTempSymbol(!BB->hasAddressTaken()); + MCSymbol *Sym = BB->hasAddressTaken() ? Context.createNamedTempSymbol() + : Context.createTempSymbol(); Entry.Symbols.push_back(Sym); return Entry.Symbols; } diff --git a/llvm/lib/MC/MCContext.cpp b/llvm/lib/MC/MCContext.cpp index d054c93e37f48..9dab8a6c09101 100644 --- a/llvm/lib/MC/MCContext.cpp +++ b/llvm/lib/MC/MCContext.cpp @@ -232,11 +232,16 @@ MCSymbol *MCContext::createSymbol(StringRef Name, bool AlwaysAddSuffix, llvm_unreachable("Infinite loop"); } -MCSymbol *MCContext::createTempSymbol(const Twine &Name, bool AlwaysAddSuffix, - bool CanBeUnnamed) { +MCSymbol *MCContext::createTempSymbol(const Twine &Name, bool AlwaysAddSuffix) { SmallString<128> NameSV; raw_svector_ostream(NameSV) << MAI->getPrivateGlobalPrefix() << Name; - return createSymbol(NameSV, AlwaysAddSuffix, CanBeUnnamed); + return createSymbol(NameSV, AlwaysAddSuffix, true); +} + +MCSymbol *MCContext::createNamedTempSymbol(const Twine &Name) { + SmallString<128> NameSV; + raw_svector_ostream(NameSV) << MAI->getPrivateGlobalPrefix() << Name; + return createSymbol(NameSV, true, false); } MCSymbol *MCContext::createLinkerPrivateTempSymbol() { @@ -245,8 +250,10 @@ MCSymbol *MCContext::createLinkerPrivateTempSymbol() { return createSymbol(NameSV, true, false); } -MCSymbol *MCContext::createTempSymbol(bool CanBeUnnamed) { - return createTempSymbol("tmp", true, CanBeUnnamed); +MCSymbol *MCContext::createTempSymbol() { return createTempSymbol("tmp"); } + +MCSymbol *MCContext::createNamedTempSymbol() { + return createNamedTempSymbol("tmp"); } unsigned MCContext::NextInstance(unsigned LocalLabelVal) { @@ -267,7 +274,7 @@ MCSymbol *MCContext::getOrCreateDirectionalLocalSymbol(unsigned LocalLabelVal, unsigned Instance) { MCSymbol *&Sym = LocalSymbols[std::make_pair(LocalLabelVal, Instance)]; if (!Sym) - Sym = createTempSymbol(false); + Sym = createNamedTempSymbol(); return Sym; } diff --git a/llvm/lib/MC/MCDwarf.cpp b/llvm/lib/MC/MCDwarf.cpp index 7f72d062b7ac6..12a7d9b6e5899 100644 --- a/llvm/lib/MC/MCDwarf.cpp +++ b/llvm/lib/MC/MCDwarf.cpp @@ -46,10 +46,8 @@ using namespace llvm; MCSymbol *mcdwarf::emitListsTableHeaderStart(MCStreamer &S) { - MCSymbol *Start = - S.getContext().createTempSymbol("debug_list_header_start", true, true); - MCSymbol *End = - S.getContext().createTempSymbol("debug_list_header_end", true, true); + MCSymbol *Start = S.getContext().createTempSymbol("debug_list_header_start"); + MCSymbol *End = S.getContext().createTempSymbol("debug_list_header_end"); auto DwarfFormat = S.getContext().getDwarfFormat(); if (DwarfFormat == dwarf::DWARF64) { S.AddComment("DWARF64 mark"); @@ -1140,7 +1138,7 @@ static MCSymbol *emitGenDwarfRanges(MCStreamer *MCOS) { MCSymbol *EndSymbol = mcdwarf::emitListsTableHeaderStart(*MCOS); MCOS->AddComment("Offset entry count"); MCOS->emitInt32(0); - RangesSymbol = context.createTempSymbol("debug_rnglist0_start", true, true); + RangesSymbol = context.createTempSymbol("debug_rnglist0_start"); MCOS->emitLabel(RangesSymbol); for (MCSection *Sec : Sections) { const MCSymbol *StartSymbol = Sec->getBeginSymbol(); @@ -1157,7 +1155,7 @@ static MCSymbol *emitGenDwarfRanges(MCStreamer *MCOS) { MCOS->emitLabel(EndSymbol); } else { MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfRangesSection()); - RangesSymbol = context.createTempSymbol("debug_ranges_start", true, true); + RangesSymbol = context.createTempSymbol("debug_ranges_start"); MCOS->emitLabel(RangesSymbol); for (MCSection *Sec : Sections) { const MCSymbol *StartSymbol = Sec->getBeginSymbol(); diff --git a/llvm/lib/MC/MCObjectStreamer.cpp b/llvm/lib/MC/MCObjectStreamer.cpp index 2f464f4b58b6f..1c23d31f8744a 100644 --- a/llvm/lib/MC/MCObjectStreamer.cpp +++ b/llvm/lib/MC/MCObjectStreamer.cpp @@ -248,7 +248,7 @@ void MCObjectStreamer::emitValueImpl(const MCExpr *Value, unsigned Size, } MCSymbol *MCObjectStreamer::emitCFILabel() { - MCSymbol *Label = getContext().createTempSymbol("cfi", true); + MCSymbol *Label = getContext().createTempSymbol("cfi"); emitLabel(Label); return Label; } diff --git a/llvm/lib/MC/MCSection.cpp b/llvm/lib/MC/MCSection.cpp index 7c5834895e523..7997b237a7ebe 100644 --- a/llvm/lib/MC/MCSection.cpp +++ b/llvm/lib/MC/MCSection.cpp @@ -28,7 +28,7 @@ MCSection::MCSection(SectionVariant V, StringRef Name, SectionKind K, MCSymbol *MCSection::getEndSymbol(MCContext &Ctx) { if (!End) - End = Ctx.createTempSymbol("sec_end", true); + End = Ctx.createTempSymbol("sec_end"); return End; } diff --git a/llvm/lib/MC/MCStreamer.cpp b/llvm/lib/MC/MCStreamer.cpp index 71a89ad46703b..4b5ae3cc202de 100644 --- a/llvm/lib/MC/MCStreamer.cpp +++ b/llvm/lib/MC/MCStreamer.cpp @@ -1076,7 +1076,7 @@ void MCStreamer::emitAbsoluteSymbolDiff(const MCSymbol *Hi, const MCSymbol *Lo, } // Otherwise, emit with .set (aka assignment). - MCSymbol *SetLabel = Context.createTempSymbol("set", true); + MCSymbol *SetLabel = Context.createTempSymbol("set"); emitAssignment(SetLabel, Diff); emitSymbolValue(SetLabel, Size); } diff --git a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp index 4ab2ff12d3196..04e21867d5711 100644 --- a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp @@ -1897,7 +1897,7 @@ void ARMAsmPrinter::emitInstruction(const MachineInstr *MI) { // LSJLJEH: Register SrcReg = MI->getOperand(0).getReg(); Register ValReg = MI->getOperand(1).getReg(); - MCSymbol *Label = OutContext.createTempSymbol("SJLJEH", true, true); + MCSymbol *Label = OutContext.createTempSymbol("SJLJEH"); OutStreamer->AddComment("eh_setjmp begin"); EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tMOVr) .addReg(ValReg) diff --git a/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp b/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp index a39489d353a26..a8853609a7c87 100644 --- a/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp +++ b/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp @@ -338,8 +338,7 @@ static bool hasPCRelativeForm(MachineInstr &Use) { // Create the symbol. MCContext &Context = MF->getContext(); - MCSymbol *Symbol = - Context.createTempSymbol(Twine("pcrel"), true, false); + MCSymbol *Symbol = Context.createNamedTempSymbol("pcrel"); MachineOperand PCRelLabel = MachineOperand::CreateMCSymbol(Symbol, PPCII::MO_PCREL_OPT_FLAG); Pair->DefInst->addOperand(*MF, PCRelLabel); diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp index 3225559f6c4d5..557c528cfd03d 100644 --- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp +++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp @@ -2124,8 +2124,7 @@ void RISCVAsmParser::emitAuipcInstPair(MCOperand DestReg, MCOperand TmpReg, // OP DestReg, TmpReg, %pcrel_lo(TmpLabel) MCContext &Ctx = getContext(); - MCSymbol *TmpLabel = Ctx.createTempSymbol( - "pcrel_hi", /* AlwaysAddSuffix */ true, /* CanBeUnnamed */ false); + MCSymbol *TmpLabel = Ctx.createNamedTempSymbol("pcrel_hi"); Out.emitLabel(TmpLabel); const RISCVMCExpr *SymbolHi = RISCVMCExpr::create(Symbol, VKHi, Ctx); From 9a8cab8bacc12d48d74249d868082effe132029e Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Mon, 21 Dec 2020 13:42:38 -0800 Subject: [PATCH 043/378] [mlir][sparse] adjust output tensor to synthetic tensor Fixes a merge conflict with previous two CLs. Reviewed By: mravishankar Differential Revision: https://reviews.llvm.org/D93664 --- mlir/lib/Dialect/Linalg/Transforms/Sparsification.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mlir/lib/Dialect/Linalg/Transforms/Sparsification.cpp b/mlir/lib/Dialect/Linalg/Transforms/Sparsification.cpp index eb940d0f769bb..a6b7277e47e33 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Sparsification.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Sparsification.cpp @@ -466,8 +466,8 @@ static unsigned buildLattices(Merger &merger, linalg::GenericOp op, // set to the undefined index in that dimension. An invariant expression // is set to a synthetic tensor with undefined indices only. unsigned s = merger.addSet(); - unsigned t = kind == Kind::kTensor ? merger.exp(exp).e0 - : op.getNumShapedOperands() - 1; + unsigned t = + kind == Kind::kTensor ? merger.exp(exp).e0 : op.getNumShapedOperands(); merger.set(s).push_back(merger.addLat(t, idx, exp)); return s; } From ed73a78924a8cf554fd4da7c41f78563ac128708 Mon Sep 17 00:00:00 2001 From: Evandro Menezes Date: Fri, 18 Dec 2020 20:34:55 -0600 Subject: [PATCH 044/378] [RISCV] Define the vand, vor and vxor RVV intrinsics Define the `vand`, `vor` and `vxor` IR intrinsics for the respective V instructions. Authored-by: Roger Ferrer Ibanez Co-Authored-by: Evandro Menezes Differential Revision: https://reviews.llvm.org/D93574 --- llvm/include/llvm/IR/IntrinsicsRISCV.td | 4 + .../Target/RISCV/RISCVInstrInfoVPseudos.td | 14 + llvm/test/CodeGen/RISCV/rvv/vand-rv32.ll | 1945 ++++++++++++++ llvm/test/CodeGen/RISCV/rvv/vand-rv64.ll | 2377 +++++++++++++++++ llvm/test/CodeGen/RISCV/rvv/vor-rv32.ll | 1945 ++++++++++++++ llvm/test/CodeGen/RISCV/rvv/vor-rv64.ll | 2377 +++++++++++++++++ llvm/test/CodeGen/RISCV/rvv/vxor-rv32.ll | 1945 ++++++++++++++ llvm/test/CodeGen/RISCV/rvv/vxor-rv64.ll | 2377 +++++++++++++++++ 8 files changed, 12984 insertions(+) create mode 100644 llvm/test/CodeGen/RISCV/rvv/vand-rv32.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vand-rv64.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vor-rv32.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vor-rv64.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vxor-rv32.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vxor-rv64.ll diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td index 055d6baa5b8d6..f65f8e6ab7796 100644 --- a/llvm/include/llvm/IR/IntrinsicsRISCV.td +++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td @@ -396,6 +396,10 @@ let TargetPrefix = "riscv" in { defm vmsbc_borrow_in : RISCVBinaryMaskOutWithV0; defm vmsbc : RISCVBinaryMaskOut; + defm vand : RISCVBinaryAAX; + defm vor : RISCVBinaryAAX; + defm vxor : RISCVBinaryAAX; + defm vsll : RISCVBinaryAAX; defm vsrl : RISCVBinaryAAX; defm vsra : RISCVBinaryAAX; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td index 050b17c440f57..0a2aad3bc2bd2 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -1598,6 +1598,13 @@ defm PseudoVSBC : VPseudoBinaryV_VM_XM; defm PseudoVMSBC : VPseudoBinaryM_VM_XM<"@earlyclobber $rd">; defm PseudoVMSBC : VPseudoBinaryM_V_X<"@earlyclobber $rd">; +//===----------------------------------------------------------------------===// +// 12.5. Vector Bitwise Logical Instructions +//===----------------------------------------------------------------------===// +defm PseudoVAND : VPseudoBinaryV_VV_VX_VI; +defm PseudoVOR : VPseudoBinaryV_VV_VX_VI; +defm PseudoVXOR : VPseudoBinaryV_VV_VX_VI; + //===----------------------------------------------------------------------===// // 12.6. Vector Single-Width Bit Shift Instructions //===----------------------------------------------------------------------===// @@ -1914,6 +1921,13 @@ defm "" : VPatBinaryV_VM_XM<"int_riscv_vsbc", "PseudoVSBC">; defm "" : VPatBinaryM_VM_XM<"int_riscv_vmsbc_borrow_in", "PseudoVMSBC">; defm "" : VPatBinaryM_V_X<"int_riscv_vmsbc", "PseudoVMSBC">; +//===----------------------------------------------------------------------===// +// 12.5. Vector Bitwise Logical Instructions +//===----------------------------------------------------------------------===// +defm "" : VPatBinaryV_VV_VX_VI<"int_riscv_vand", "PseudoVAND", AllIntegerVectors>; +defm "" : VPatBinaryV_VV_VX_VI<"int_riscv_vor", "PseudoVOR", AllIntegerVectors>; +defm "" : VPatBinaryV_VV_VX_VI<"int_riscv_vxor", "PseudoVXOR", AllIntegerVectors>; + //===----------------------------------------------------------------------===// // 12.6. Vector Single-Width Bit Shift Instructions //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/RISCV/rvv/vand-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vand-rv32.ll new file mode 100644 index 0000000000000..0dd0a556e2a85 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vand-rv32.ll @@ -0,0 +1,1945 @@ +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare @llvm.riscv.vand.nxv1i8.nxv1i8( + , + , + i32); + +define @intrinsic_vand_vv_nxv1i8_nxv1i8_nxv1i8( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vv_nxv1i8_nxv1i8_nxv1i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vand.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vand.nxv1i8.nxv1i8( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vand.mask.nxv1i8.nxv1i8( + , + , + , + , + i32); + +define @intrinsic_vand_mask_vv_nxv1i8_nxv1i8_nxv1i8( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vv_nxv1i8_nxv1i8_nxv1i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vand.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vand.mask.nxv1i8.nxv1i8( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vand.nxv2i8.nxv2i8( + , + , + i32); + +define @intrinsic_vand_vv_nxv2i8_nxv2i8_nxv2i8( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vv_nxv2i8_nxv2i8_nxv2i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vand.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vand.nxv2i8.nxv2i8( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vand.mask.nxv2i8.nxv2i8( + , + , + , + , + i32); + +define @intrinsic_vand_mask_vv_nxv2i8_nxv2i8_nxv2i8( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vv_nxv2i8_nxv2i8_nxv2i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vand.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vand.mask.nxv2i8.nxv2i8( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vand.nxv4i8.nxv4i8( + , + , + i32); + +define @intrinsic_vand_vv_nxv4i8_nxv4i8_nxv4i8( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vv_nxv4i8_nxv4i8_nxv4i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vand.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vand.nxv4i8.nxv4i8( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vand.mask.nxv4i8.nxv4i8( + , + , + , + , + i32); + +define @intrinsic_vand_mask_vv_nxv4i8_nxv4i8_nxv4i8( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vv_nxv4i8_nxv4i8_nxv4i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vand.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vand.mask.nxv4i8.nxv4i8( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vand.nxv8i8.nxv8i8( + , + , + i32); + +define @intrinsic_vand_vv_nxv8i8_nxv8i8_nxv8i8( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vv_nxv8i8_nxv8i8_nxv8i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vand.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vand.nxv8i8.nxv8i8( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vand.mask.nxv8i8.nxv8i8( + , + , + , + , + i32); + +define @intrinsic_vand_mask_vv_nxv8i8_nxv8i8_nxv8i8( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vv_nxv8i8_nxv8i8_nxv8i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vand.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vand.mask.nxv8i8.nxv8i8( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vand.nxv16i8.nxv16i8( + , + , + i32); + +define @intrinsic_vand_vv_nxv16i8_nxv16i8_nxv16i8( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vv_nxv16i8_nxv16i8_nxv16i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vand.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vand.nxv16i8.nxv16i8( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vand.mask.nxv16i8.nxv16i8( + , + , + , + , + i32); + +define @intrinsic_vand_mask_vv_nxv16i8_nxv16i8_nxv16i8( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vv_nxv16i8_nxv16i8_nxv16i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vand.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vand.mask.nxv16i8.nxv16i8( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vand.nxv32i8.nxv32i8( + , + , + i32); + +define @intrinsic_vand_vv_nxv32i8_nxv32i8_nxv32i8( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vv_nxv32i8_nxv32i8_nxv32i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vand.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vand.nxv32i8.nxv32i8( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vand.mask.nxv32i8.nxv32i8( + , + , + , + , + i32); + +define @intrinsic_vand_mask_vv_nxv32i8_nxv32i8_nxv32i8( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vv_nxv32i8_nxv32i8_nxv32i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vand.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vand.mask.nxv32i8.nxv32i8( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vand.nxv64i8.nxv64i8( + , + , + i32); + +define @intrinsic_vand_vv_nxv64i8_nxv64i8_nxv64i8( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vv_nxv64i8_nxv64i8_nxv64i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m8,ta,mu +; CHECK: vand.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vand.nxv64i8.nxv64i8( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vand.mask.nxv64i8.nxv64i8( + , + , + , + , + i32); + +define @intrinsic_vand_mask_vv_nxv64i8_nxv64i8_nxv64i8( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vv_nxv64i8_nxv64i8_nxv64i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m8,ta,mu +; CHECK: vand.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vand.mask.nxv64i8.nxv64i8( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vand.nxv1i16.nxv1i16( + , + , + i32); + +define @intrinsic_vand_vv_nxv1i16_nxv1i16_nxv1i16( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vv_nxv1i16_nxv1i16_nxv1i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vand.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vand.nxv1i16.nxv1i16( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vand.mask.nxv1i16.nxv1i16( + , + , + , + , + i32); + +define @intrinsic_vand_mask_vv_nxv1i16_nxv1i16_nxv1i16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vv_nxv1i16_nxv1i16_nxv1i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vand.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vand.mask.nxv1i16.nxv1i16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vand.nxv2i16.nxv2i16( + , + , + i32); + +define @intrinsic_vand_vv_nxv2i16_nxv2i16_nxv2i16( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vv_nxv2i16_nxv2i16_nxv2i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vand.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vand.nxv2i16.nxv2i16( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vand.mask.nxv2i16.nxv2i16( + , + , + , + , + i32); + +define @intrinsic_vand_mask_vv_nxv2i16_nxv2i16_nxv2i16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vv_nxv2i16_nxv2i16_nxv2i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vand.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vand.mask.nxv2i16.nxv2i16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vand.nxv4i16.nxv4i16( + , + , + i32); + +define @intrinsic_vand_vv_nxv4i16_nxv4i16_nxv4i16( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vv_nxv4i16_nxv4i16_nxv4i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vand.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vand.nxv4i16.nxv4i16( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vand.mask.nxv4i16.nxv4i16( + , + , + , + , + i32); + +define @intrinsic_vand_mask_vv_nxv4i16_nxv4i16_nxv4i16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vv_nxv4i16_nxv4i16_nxv4i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vand.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vand.mask.nxv4i16.nxv4i16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vand.nxv8i16.nxv8i16( + , + , + i32); + +define @intrinsic_vand_vv_nxv8i16_nxv8i16_nxv8i16( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vv_nxv8i16_nxv8i16_nxv8i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vand.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vand.nxv8i16.nxv8i16( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vand.mask.nxv8i16.nxv8i16( + , + , + , + , + i32); + +define @intrinsic_vand_mask_vv_nxv8i16_nxv8i16_nxv8i16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vv_nxv8i16_nxv8i16_nxv8i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vand.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vand.mask.nxv8i16.nxv8i16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vand.nxv16i16.nxv16i16( + , + , + i32); + +define @intrinsic_vand_vv_nxv16i16_nxv16i16_nxv16i16( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vv_nxv16i16_nxv16i16_nxv16i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vand.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vand.nxv16i16.nxv16i16( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vand.mask.nxv16i16.nxv16i16( + , + , + , + , + i32); + +define @intrinsic_vand_mask_vv_nxv16i16_nxv16i16_nxv16i16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vv_nxv16i16_nxv16i16_nxv16i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vand.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vand.mask.nxv16i16.nxv16i16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vand.nxv32i16.nxv32i16( + , + , + i32); + +define @intrinsic_vand_vv_nxv32i16_nxv32i16_nxv32i16( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vv_nxv32i16_nxv32i16_nxv32i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m8,ta,mu +; CHECK: vand.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vand.nxv32i16.nxv32i16( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vand.mask.nxv32i16.nxv32i16( + , + , + , + , + i32); + +define @intrinsic_vand_mask_vv_nxv32i16_nxv32i16_nxv32i16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vv_nxv32i16_nxv32i16_nxv32i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m8,ta,mu +; CHECK: vand.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vand.mask.nxv32i16.nxv32i16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vand.nxv1i32.nxv1i32( + , + , + i32); + +define @intrinsic_vand_vv_nxv1i32_nxv1i32_nxv1i32( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vv_nxv1i32_nxv1i32_nxv1i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vand.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vand.nxv1i32.nxv1i32( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vand.mask.nxv1i32.nxv1i32( + , + , + , + , + i32); + +define @intrinsic_vand_mask_vv_nxv1i32_nxv1i32_nxv1i32( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vv_nxv1i32_nxv1i32_nxv1i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vand.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vand.mask.nxv1i32.nxv1i32( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vand.nxv2i32.nxv2i32( + , + , + i32); + +define @intrinsic_vand_vv_nxv2i32_nxv2i32_nxv2i32( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vv_nxv2i32_nxv2i32_nxv2i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vand.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vand.nxv2i32.nxv2i32( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vand.mask.nxv2i32.nxv2i32( + , + , + , + , + i32); + +define @intrinsic_vand_mask_vv_nxv2i32_nxv2i32_nxv2i32( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vv_nxv2i32_nxv2i32_nxv2i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vand.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vand.mask.nxv2i32.nxv2i32( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vand.nxv4i32.nxv4i32( + , + , + i32); + +define @intrinsic_vand_vv_nxv4i32_nxv4i32_nxv4i32( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vv_nxv4i32_nxv4i32_nxv4i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vand.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vand.nxv4i32.nxv4i32( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vand.mask.nxv4i32.nxv4i32( + , + , + , + , + i32); + +define @intrinsic_vand_mask_vv_nxv4i32_nxv4i32_nxv4i32( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vv_nxv4i32_nxv4i32_nxv4i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vand.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vand.mask.nxv4i32.nxv4i32( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vand.nxv8i32.nxv8i32( + , + , + i32); + +define @intrinsic_vand_vv_nxv8i32_nxv8i32_nxv8i32( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vv_nxv8i32_nxv8i32_nxv8i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vand.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vand.nxv8i32.nxv8i32( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vand.mask.nxv8i32.nxv8i32( + , + , + , + , + i32); + +define @intrinsic_vand_mask_vv_nxv8i32_nxv8i32_nxv8i32( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vv_nxv8i32_nxv8i32_nxv8i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vand.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vand.mask.nxv8i32.nxv8i32( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vand.nxv16i32.nxv16i32( + , + , + i32); + +define @intrinsic_vand_vv_nxv16i32_nxv16i32_nxv16i32( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vv_nxv16i32_nxv16i32_nxv16i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m8,ta,mu +; CHECK: vand.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vand.nxv16i32.nxv16i32( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vand.mask.nxv16i32.nxv16i32( + , + , + , + , + i32); + +define @intrinsic_vand_mask_vv_nxv16i32_nxv16i32_nxv16i32( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vv_nxv16i32_nxv16i32_nxv16i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m8,ta,mu +; CHECK: vand.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vand.mask.nxv16i32.nxv16i32( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vand.nxv1i8.i8( + , + i8, + i32); + +define @intrinsic_vand_vx_nxv1i8_nxv1i8_i8( %0, i8 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vx_nxv1i8_nxv1i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vand.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vand.nxv1i8.i8( + %0, + i8 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vand.mask.nxv1i8.i8( + , + , + i8, + , + i32); + +define @intrinsic_vand_mask_vx_nxv1i8_nxv1i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vx_nxv1i8_nxv1i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vand.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vand.mask.nxv1i8.i8( + %0, + %1, + i8 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vand.nxv2i8.i8( + , + i8, + i32); + +define @intrinsic_vand_vx_nxv2i8_nxv2i8_i8( %0, i8 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vx_nxv2i8_nxv2i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vand.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vand.nxv2i8.i8( + %0, + i8 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vand.mask.nxv2i8.i8( + , + , + i8, + , + i32); + +define @intrinsic_vand_mask_vx_nxv2i8_nxv2i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vx_nxv2i8_nxv2i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vand.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vand.mask.nxv2i8.i8( + %0, + %1, + i8 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vand.nxv4i8.i8( + , + i8, + i32); + +define @intrinsic_vand_vx_nxv4i8_nxv4i8_i8( %0, i8 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vx_nxv4i8_nxv4i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vand.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vand.nxv4i8.i8( + %0, + i8 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vand.mask.nxv4i8.i8( + , + , + i8, + , + i32); + +define @intrinsic_vand_mask_vx_nxv4i8_nxv4i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vx_nxv4i8_nxv4i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vand.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vand.mask.nxv4i8.i8( + %0, + %1, + i8 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vand.nxv8i8.i8( + , + i8, + i32); + +define @intrinsic_vand_vx_nxv8i8_nxv8i8_i8( %0, i8 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vx_nxv8i8_nxv8i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vand.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vand.nxv8i8.i8( + %0, + i8 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vand.mask.nxv8i8.i8( + , + , + i8, + , + i32); + +define @intrinsic_vand_mask_vx_nxv8i8_nxv8i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vx_nxv8i8_nxv8i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vand.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vand.mask.nxv8i8.i8( + %0, + %1, + i8 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vand.nxv16i8.i8( + , + i8, + i32); + +define @intrinsic_vand_vx_nxv16i8_nxv16i8_i8( %0, i8 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vx_nxv16i8_nxv16i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vand.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vand.nxv16i8.i8( + %0, + i8 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vand.mask.nxv16i8.i8( + , + , + i8, + , + i32); + +define @intrinsic_vand_mask_vx_nxv16i8_nxv16i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vx_nxv16i8_nxv16i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vand.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vand.mask.nxv16i8.i8( + %0, + %1, + i8 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vand.nxv32i8.i8( + , + i8, + i32); + +define @intrinsic_vand_vx_nxv32i8_nxv32i8_i8( %0, i8 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vx_nxv32i8_nxv32i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vand.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vand.nxv32i8.i8( + %0, + i8 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vand.mask.nxv32i8.i8( + , + , + i8, + , + i32); + +define @intrinsic_vand_mask_vx_nxv32i8_nxv32i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vx_nxv32i8_nxv32i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vand.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vand.mask.nxv32i8.i8( + %0, + %1, + i8 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vand.nxv64i8.i8( + , + i8, + i32); + +define @intrinsic_vand_vx_nxv64i8_nxv64i8_i8( %0, i8 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vx_nxv64i8_nxv64i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m8,ta,mu +; CHECK: vand.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vand.nxv64i8.i8( + %0, + i8 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vand.mask.nxv64i8.i8( + , + , + i8, + , + i32); + +define @intrinsic_vand_mask_vx_nxv64i8_nxv64i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vx_nxv64i8_nxv64i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m8,ta,mu +; CHECK: vand.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vand.mask.nxv64i8.i8( + %0, + %1, + i8 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vand.nxv1i16.i16( + , + i16, + i32); + +define @intrinsic_vand_vx_nxv1i16_nxv1i16_i16( %0, i16 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vx_nxv1i16_nxv1i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vand.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vand.nxv1i16.i16( + %0, + i16 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vand.mask.nxv1i16.i16( + , + , + i16, + , + i32); + +define @intrinsic_vand_mask_vx_nxv1i16_nxv1i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vx_nxv1i16_nxv1i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vand.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vand.mask.nxv1i16.i16( + %0, + %1, + i16 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vand.nxv2i16.i16( + , + i16, + i32); + +define @intrinsic_vand_vx_nxv2i16_nxv2i16_i16( %0, i16 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vx_nxv2i16_nxv2i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vand.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vand.nxv2i16.i16( + %0, + i16 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vand.mask.nxv2i16.i16( + , + , + i16, + , + i32); + +define @intrinsic_vand_mask_vx_nxv2i16_nxv2i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vx_nxv2i16_nxv2i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vand.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vand.mask.nxv2i16.i16( + %0, + %1, + i16 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vand.nxv4i16.i16( + , + i16, + i32); + +define @intrinsic_vand_vx_nxv4i16_nxv4i16_i16( %0, i16 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vx_nxv4i16_nxv4i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vand.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vand.nxv4i16.i16( + %0, + i16 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vand.mask.nxv4i16.i16( + , + , + i16, + , + i32); + +define @intrinsic_vand_mask_vx_nxv4i16_nxv4i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vx_nxv4i16_nxv4i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vand.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vand.mask.nxv4i16.i16( + %0, + %1, + i16 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vand.nxv8i16.i16( + , + i16, + i32); + +define @intrinsic_vand_vx_nxv8i16_nxv8i16_i16( %0, i16 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vx_nxv8i16_nxv8i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vand.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vand.nxv8i16.i16( + %0, + i16 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vand.mask.nxv8i16.i16( + , + , + i16, + , + i32); + +define @intrinsic_vand_mask_vx_nxv8i16_nxv8i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vx_nxv8i16_nxv8i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vand.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vand.mask.nxv8i16.i16( + %0, + %1, + i16 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vand.nxv16i16.i16( + , + i16, + i32); + +define @intrinsic_vand_vx_nxv16i16_nxv16i16_i16( %0, i16 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vx_nxv16i16_nxv16i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vand.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vand.nxv16i16.i16( + %0, + i16 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vand.mask.nxv16i16.i16( + , + , + i16, + , + i32); + +define @intrinsic_vand_mask_vx_nxv16i16_nxv16i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vx_nxv16i16_nxv16i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vand.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vand.mask.nxv16i16.i16( + %0, + %1, + i16 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vand.nxv32i16.i16( + , + i16, + i32); + +define @intrinsic_vand_vx_nxv32i16_nxv32i16_i16( %0, i16 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vx_nxv32i16_nxv32i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m8,ta,mu +; CHECK: vand.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vand.nxv32i16.i16( + %0, + i16 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vand.mask.nxv32i16.i16( + , + , + i16, + , + i32); + +define @intrinsic_vand_mask_vx_nxv32i16_nxv32i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vx_nxv32i16_nxv32i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m8,ta,mu +; CHECK: vand.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vand.mask.nxv32i16.i16( + %0, + %1, + i16 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vand.nxv1i32.i32( + , + i32, + i32); + +define @intrinsic_vand_vx_nxv1i32_nxv1i32_i32( %0, i32 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vx_nxv1i32_nxv1i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vand.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vand.nxv1i32.i32( + %0, + i32 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vand.mask.nxv1i32.i32( + , + , + i32, + , + i32); + +define @intrinsic_vand_mask_vx_nxv1i32_nxv1i32_i32( %0, %1, i32 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vx_nxv1i32_nxv1i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vand.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vand.mask.nxv1i32.i32( + %0, + %1, + i32 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vand.nxv2i32.i32( + , + i32, + i32); + +define @intrinsic_vand_vx_nxv2i32_nxv2i32_i32( %0, i32 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vx_nxv2i32_nxv2i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vand.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vand.nxv2i32.i32( + %0, + i32 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vand.mask.nxv2i32.i32( + , + , + i32, + , + i32); + +define @intrinsic_vand_mask_vx_nxv2i32_nxv2i32_i32( %0, %1, i32 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vx_nxv2i32_nxv2i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vand.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vand.mask.nxv2i32.i32( + %0, + %1, + i32 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vand.nxv4i32.i32( + , + i32, + i32); + +define @intrinsic_vand_vx_nxv4i32_nxv4i32_i32( %0, i32 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vx_nxv4i32_nxv4i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vand.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vand.nxv4i32.i32( + %0, + i32 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vand.mask.nxv4i32.i32( + , + , + i32, + , + i32); + +define @intrinsic_vand_mask_vx_nxv4i32_nxv4i32_i32( %0, %1, i32 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vx_nxv4i32_nxv4i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vand.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vand.mask.nxv4i32.i32( + %0, + %1, + i32 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vand.nxv8i32.i32( + , + i32, + i32); + +define @intrinsic_vand_vx_nxv8i32_nxv8i32_i32( %0, i32 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vx_nxv8i32_nxv8i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vand.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vand.nxv8i32.i32( + %0, + i32 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vand.mask.nxv8i32.i32( + , + , + i32, + , + i32); + +define @intrinsic_vand_mask_vx_nxv8i32_nxv8i32_i32( %0, %1, i32 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vx_nxv8i32_nxv8i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vand.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vand.mask.nxv8i32.i32( + %0, + %1, + i32 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vand.nxv16i32.i32( + , + i32, + i32); + +define @intrinsic_vand_vx_nxv16i32_nxv16i32_i32( %0, i32 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vx_nxv16i32_nxv16i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m8,ta,mu +; CHECK: vand.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vand.nxv16i32.i32( + %0, + i32 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vand.mask.nxv16i32.i32( + , + , + i32, + , + i32); + +define @intrinsic_vand_mask_vx_nxv16i32_nxv16i32_i32( %0, %1, i32 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vx_nxv16i32_nxv16i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m8,ta,mu +; CHECK: vand.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vand.mask.nxv16i32.i32( + %0, + %1, + i32 %2, + %3, + i32 %4) + + ret %a +} + +define @intrinsic_vand_vi_nxv1i8_nxv1i8_i8( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vi_nxv1i8_nxv1i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vand.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vand.nxv1i8.i8( + %0, + i8 9, + i32 %1) + + ret %a +} + +define @intrinsic_vand_mask_vi_nxv1i8_nxv1i8_i8( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vi_nxv1i8_nxv1i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vand.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vand.mask.nxv1i8.i8( + %0, + %1, + i8 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vand_vi_nxv2i8_nxv2i8_i8( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vi_nxv2i8_nxv2i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vand.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vand.nxv2i8.i8( + %0, + i8 9, + i32 %1) + + ret %a +} + +define @intrinsic_vand_mask_vi_nxv2i8_nxv2i8_i8( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vi_nxv2i8_nxv2i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vand.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vand.mask.nxv2i8.i8( + %0, + %1, + i8 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vand_vi_nxv4i8_nxv4i8_i8( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vi_nxv4i8_nxv4i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vand.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vand.nxv4i8.i8( + %0, + i8 9, + i32 %1) + + ret %a +} + +define @intrinsic_vand_mask_vi_nxv4i8_nxv4i8_i8( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vi_nxv4i8_nxv4i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vand.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vand.mask.nxv4i8.i8( + %0, + %1, + i8 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vand_vi_nxv8i8_nxv8i8_i8( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vi_nxv8i8_nxv8i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vand.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vand.nxv8i8.i8( + %0, + i8 9, + i32 %1) + + ret %a +} + +define @intrinsic_vand_mask_vi_nxv8i8_nxv8i8_i8( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vi_nxv8i8_nxv8i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vand.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vand.mask.nxv8i8.i8( + %0, + %1, + i8 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vand_vi_nxv16i8_nxv16i8_i8( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vi_nxv16i8_nxv16i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vand.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vand.nxv16i8.i8( + %0, + i8 9, + i32 %1) + + ret %a +} + +define @intrinsic_vand_mask_vi_nxv16i8_nxv16i8_i8( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vi_nxv16i8_nxv16i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vand.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vand.mask.nxv16i8.i8( + %0, + %1, + i8 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vand_vi_nxv32i8_nxv32i8_i8( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vi_nxv32i8_nxv32i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vand.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vand.nxv32i8.i8( + %0, + i8 9, + i32 %1) + + ret %a +} + +define @intrinsic_vand_mask_vi_nxv32i8_nxv32i8_i8( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vi_nxv32i8_nxv32i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vand.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vand.mask.nxv32i8.i8( + %0, + %1, + i8 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vand_vi_nxv64i8_nxv64i8_i8( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vi_nxv64i8_nxv64i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m8,ta,mu +; CHECK: vand.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vand.nxv64i8.i8( + %0, + i8 9, + i32 %1) + + ret %a +} + +define @intrinsic_vand_mask_vi_nxv64i8_nxv64i8_i8( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vi_nxv64i8_nxv64i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m8,ta,mu +; CHECK: vand.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vand.mask.nxv64i8.i8( + %0, + %1, + i8 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vand_vi_nxv1i16_nxv1i16_i16( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vi_nxv1i16_nxv1i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vand.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vand.nxv1i16.i16( + %0, + i16 9, + i32 %1) + + ret %a +} + +define @intrinsic_vand_mask_vi_nxv1i16_nxv1i16_i16( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vi_nxv1i16_nxv1i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vand.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vand.mask.nxv1i16.i16( + %0, + %1, + i16 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vand_vi_nxv2i16_nxv2i16_i16( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vi_nxv2i16_nxv2i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vand.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vand.nxv2i16.i16( + %0, + i16 9, + i32 %1) + + ret %a +} + +define @intrinsic_vand_mask_vi_nxv2i16_nxv2i16_i16( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vi_nxv2i16_nxv2i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vand.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vand.mask.nxv2i16.i16( + %0, + %1, + i16 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vand_vi_nxv4i16_nxv4i16_i16( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vi_nxv4i16_nxv4i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vand.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vand.nxv4i16.i16( + %0, + i16 9, + i32 %1) + + ret %a +} + +define @intrinsic_vand_mask_vi_nxv4i16_nxv4i16_i16( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vi_nxv4i16_nxv4i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vand.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vand.mask.nxv4i16.i16( + %0, + %1, + i16 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vand_vi_nxv8i16_nxv8i16_i16( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vi_nxv8i16_nxv8i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vand.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vand.nxv8i16.i16( + %0, + i16 9, + i32 %1) + + ret %a +} + +define @intrinsic_vand_mask_vi_nxv8i16_nxv8i16_i16( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vi_nxv8i16_nxv8i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vand.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vand.mask.nxv8i16.i16( + %0, + %1, + i16 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vand_vi_nxv16i16_nxv16i16_i16( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vi_nxv16i16_nxv16i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vand.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vand.nxv16i16.i16( + %0, + i16 9, + i32 %1) + + ret %a +} + +define @intrinsic_vand_mask_vi_nxv16i16_nxv16i16_i16( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vi_nxv16i16_nxv16i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vand.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vand.mask.nxv16i16.i16( + %0, + %1, + i16 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vand_vi_nxv32i16_nxv32i16_i16( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vi_nxv32i16_nxv32i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m8,ta,mu +; CHECK: vand.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vand.nxv32i16.i16( + %0, + i16 9, + i32 %1) + + ret %a +} + +define @intrinsic_vand_mask_vi_nxv32i16_nxv32i16_i16( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vi_nxv32i16_nxv32i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m8,ta,mu +; CHECK: vand.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vand.mask.nxv32i16.i16( + %0, + %1, + i16 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vand_vi_nxv1i32_nxv1i32_i32( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vi_nxv1i32_nxv1i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vand.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vand.nxv1i32.i32( + %0, + i32 9, + i32 %1) + + ret %a +} + +define @intrinsic_vand_mask_vi_nxv1i32_nxv1i32_i32( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vi_nxv1i32_nxv1i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vand.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vand.mask.nxv1i32.i32( + %0, + %1, + i32 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vand_vi_nxv2i32_nxv2i32_i32( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vi_nxv2i32_nxv2i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vand.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vand.nxv2i32.i32( + %0, + i32 9, + i32 %1) + + ret %a +} + +define @intrinsic_vand_mask_vi_nxv2i32_nxv2i32_i32( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vi_nxv2i32_nxv2i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vand.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vand.mask.nxv2i32.i32( + %0, + %1, + i32 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vand_vi_nxv4i32_nxv4i32_i32( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vi_nxv4i32_nxv4i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vand.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vand.nxv4i32.i32( + %0, + i32 9, + i32 %1) + + ret %a +} + +define @intrinsic_vand_mask_vi_nxv4i32_nxv4i32_i32( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vi_nxv4i32_nxv4i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vand.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vand.mask.nxv4i32.i32( + %0, + %1, + i32 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vand_vi_nxv8i32_nxv8i32_i32( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vi_nxv8i32_nxv8i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vand.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vand.nxv8i32.i32( + %0, + i32 9, + i32 %1) + + ret %a +} + +define @intrinsic_vand_mask_vi_nxv8i32_nxv8i32_i32( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vi_nxv8i32_nxv8i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vand.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vand.mask.nxv8i32.i32( + %0, + %1, + i32 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vand_vi_nxv16i32_nxv16i32_i32( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vi_nxv16i32_nxv16i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m8,ta,mu +; CHECK: vand.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vand.nxv16i32.i32( + %0, + i32 9, + i32 %1) + + ret %a +} + +define @intrinsic_vand_mask_vi_nxv16i32_nxv16i32_i32( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vi_nxv16i32_nxv16i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m8,ta,mu +; CHECK: vand.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vand.mask.nxv16i32.i32( + %0, + %1, + i32 9, + %2, + i32 %3) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vand-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vand-rv64.ll new file mode 100644 index 0000000000000..e6ae2578f5703 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vand-rv64.ll @@ -0,0 +1,2377 @@ +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare @llvm.riscv.vand.nxv1i8.nxv1i8( + , + , + i64); + +define @intrinsic_vand_vv_nxv1i8_nxv1i8_nxv1i8( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vv_nxv1i8_nxv1i8_nxv1i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vand.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vand.nxv1i8.nxv1i8( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vand.mask.nxv1i8.nxv1i8( + , + , + , + , + i64); + +define @intrinsic_vand_mask_vv_nxv1i8_nxv1i8_nxv1i8( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vv_nxv1i8_nxv1i8_nxv1i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vand.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vand.mask.nxv1i8.nxv1i8( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vand.nxv2i8.nxv2i8( + , + , + i64); + +define @intrinsic_vand_vv_nxv2i8_nxv2i8_nxv2i8( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vv_nxv2i8_nxv2i8_nxv2i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vand.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vand.nxv2i8.nxv2i8( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vand.mask.nxv2i8.nxv2i8( + , + , + , + , + i64); + +define @intrinsic_vand_mask_vv_nxv2i8_nxv2i8_nxv2i8( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vv_nxv2i8_nxv2i8_nxv2i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vand.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vand.mask.nxv2i8.nxv2i8( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vand.nxv4i8.nxv4i8( + , + , + i64); + +define @intrinsic_vand_vv_nxv4i8_nxv4i8_nxv4i8( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vv_nxv4i8_nxv4i8_nxv4i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vand.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vand.nxv4i8.nxv4i8( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vand.mask.nxv4i8.nxv4i8( + , + , + , + , + i64); + +define @intrinsic_vand_mask_vv_nxv4i8_nxv4i8_nxv4i8( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vv_nxv4i8_nxv4i8_nxv4i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vand.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vand.mask.nxv4i8.nxv4i8( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vand.nxv8i8.nxv8i8( + , + , + i64); + +define @intrinsic_vand_vv_nxv8i8_nxv8i8_nxv8i8( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vv_nxv8i8_nxv8i8_nxv8i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vand.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vand.nxv8i8.nxv8i8( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vand.mask.nxv8i8.nxv8i8( + , + , + , + , + i64); + +define @intrinsic_vand_mask_vv_nxv8i8_nxv8i8_nxv8i8( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vv_nxv8i8_nxv8i8_nxv8i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vand.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vand.mask.nxv8i8.nxv8i8( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vand.nxv16i8.nxv16i8( + , + , + i64); + +define @intrinsic_vand_vv_nxv16i8_nxv16i8_nxv16i8( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vv_nxv16i8_nxv16i8_nxv16i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vand.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vand.nxv16i8.nxv16i8( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vand.mask.nxv16i8.nxv16i8( + , + , + , + , + i64); + +define @intrinsic_vand_mask_vv_nxv16i8_nxv16i8_nxv16i8( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vv_nxv16i8_nxv16i8_nxv16i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vand.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vand.mask.nxv16i8.nxv16i8( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vand.nxv32i8.nxv32i8( + , + , + i64); + +define @intrinsic_vand_vv_nxv32i8_nxv32i8_nxv32i8( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vv_nxv32i8_nxv32i8_nxv32i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vand.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vand.nxv32i8.nxv32i8( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vand.mask.nxv32i8.nxv32i8( + , + , + , + , + i64); + +define @intrinsic_vand_mask_vv_nxv32i8_nxv32i8_nxv32i8( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vv_nxv32i8_nxv32i8_nxv32i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vand.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vand.mask.nxv32i8.nxv32i8( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vand.nxv64i8.nxv64i8( + , + , + i64); + +define @intrinsic_vand_vv_nxv64i8_nxv64i8_nxv64i8( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vv_nxv64i8_nxv64i8_nxv64i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m8,ta,mu +; CHECK: vand.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vand.nxv64i8.nxv64i8( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vand.mask.nxv64i8.nxv64i8( + , + , + , + , + i64); + +define @intrinsic_vand_mask_vv_nxv64i8_nxv64i8_nxv64i8( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vv_nxv64i8_nxv64i8_nxv64i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m8,ta,mu +; CHECK: vand.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vand.mask.nxv64i8.nxv64i8( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vand.nxv1i16.nxv1i16( + , + , + i64); + +define @intrinsic_vand_vv_nxv1i16_nxv1i16_nxv1i16( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vv_nxv1i16_nxv1i16_nxv1i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vand.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vand.nxv1i16.nxv1i16( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vand.mask.nxv1i16.nxv1i16( + , + , + , + , + i64); + +define @intrinsic_vand_mask_vv_nxv1i16_nxv1i16_nxv1i16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vv_nxv1i16_nxv1i16_nxv1i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vand.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vand.mask.nxv1i16.nxv1i16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vand.nxv2i16.nxv2i16( + , + , + i64); + +define @intrinsic_vand_vv_nxv2i16_nxv2i16_nxv2i16( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vv_nxv2i16_nxv2i16_nxv2i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vand.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vand.nxv2i16.nxv2i16( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vand.mask.nxv2i16.nxv2i16( + , + , + , + , + i64); + +define @intrinsic_vand_mask_vv_nxv2i16_nxv2i16_nxv2i16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vv_nxv2i16_nxv2i16_nxv2i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vand.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vand.mask.nxv2i16.nxv2i16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vand.nxv4i16.nxv4i16( + , + , + i64); + +define @intrinsic_vand_vv_nxv4i16_nxv4i16_nxv4i16( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vv_nxv4i16_nxv4i16_nxv4i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vand.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vand.nxv4i16.nxv4i16( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vand.mask.nxv4i16.nxv4i16( + , + , + , + , + i64); + +define @intrinsic_vand_mask_vv_nxv4i16_nxv4i16_nxv4i16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vv_nxv4i16_nxv4i16_nxv4i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vand.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vand.mask.nxv4i16.nxv4i16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vand.nxv8i16.nxv8i16( + , + , + i64); + +define @intrinsic_vand_vv_nxv8i16_nxv8i16_nxv8i16( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vv_nxv8i16_nxv8i16_nxv8i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vand.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vand.nxv8i16.nxv8i16( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vand.mask.nxv8i16.nxv8i16( + , + , + , + , + i64); + +define @intrinsic_vand_mask_vv_nxv8i16_nxv8i16_nxv8i16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vv_nxv8i16_nxv8i16_nxv8i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vand.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vand.mask.nxv8i16.nxv8i16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vand.nxv16i16.nxv16i16( + , + , + i64); + +define @intrinsic_vand_vv_nxv16i16_nxv16i16_nxv16i16( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vv_nxv16i16_nxv16i16_nxv16i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vand.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vand.nxv16i16.nxv16i16( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vand.mask.nxv16i16.nxv16i16( + , + , + , + , + i64); + +define @intrinsic_vand_mask_vv_nxv16i16_nxv16i16_nxv16i16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vv_nxv16i16_nxv16i16_nxv16i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vand.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vand.mask.nxv16i16.nxv16i16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vand.nxv32i16.nxv32i16( + , + , + i64); + +define @intrinsic_vand_vv_nxv32i16_nxv32i16_nxv32i16( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vv_nxv32i16_nxv32i16_nxv32i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m8,ta,mu +; CHECK: vand.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vand.nxv32i16.nxv32i16( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vand.mask.nxv32i16.nxv32i16( + , + , + , + , + i64); + +define @intrinsic_vand_mask_vv_nxv32i16_nxv32i16_nxv32i16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vv_nxv32i16_nxv32i16_nxv32i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m8,ta,mu +; CHECK: vand.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vand.mask.nxv32i16.nxv32i16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vand.nxv1i32.nxv1i32( + , + , + i64); + +define @intrinsic_vand_vv_nxv1i32_nxv1i32_nxv1i32( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vv_nxv1i32_nxv1i32_nxv1i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vand.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vand.nxv1i32.nxv1i32( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vand.mask.nxv1i32.nxv1i32( + , + , + , + , + i64); + +define @intrinsic_vand_mask_vv_nxv1i32_nxv1i32_nxv1i32( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vv_nxv1i32_nxv1i32_nxv1i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vand.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vand.mask.nxv1i32.nxv1i32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vand.nxv2i32.nxv2i32( + , + , + i64); + +define @intrinsic_vand_vv_nxv2i32_nxv2i32_nxv2i32( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vv_nxv2i32_nxv2i32_nxv2i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vand.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vand.nxv2i32.nxv2i32( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vand.mask.nxv2i32.nxv2i32( + , + , + , + , + i64); + +define @intrinsic_vand_mask_vv_nxv2i32_nxv2i32_nxv2i32( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vv_nxv2i32_nxv2i32_nxv2i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vand.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vand.mask.nxv2i32.nxv2i32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vand.nxv4i32.nxv4i32( + , + , + i64); + +define @intrinsic_vand_vv_nxv4i32_nxv4i32_nxv4i32( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vv_nxv4i32_nxv4i32_nxv4i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vand.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vand.nxv4i32.nxv4i32( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vand.mask.nxv4i32.nxv4i32( + , + , + , + , + i64); + +define @intrinsic_vand_mask_vv_nxv4i32_nxv4i32_nxv4i32( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vv_nxv4i32_nxv4i32_nxv4i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vand.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vand.mask.nxv4i32.nxv4i32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vand.nxv8i32.nxv8i32( + , + , + i64); + +define @intrinsic_vand_vv_nxv8i32_nxv8i32_nxv8i32( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vv_nxv8i32_nxv8i32_nxv8i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vand.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vand.nxv8i32.nxv8i32( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vand.mask.nxv8i32.nxv8i32( + , + , + , + , + i64); + +define @intrinsic_vand_mask_vv_nxv8i32_nxv8i32_nxv8i32( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vv_nxv8i32_nxv8i32_nxv8i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vand.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vand.mask.nxv8i32.nxv8i32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vand.nxv16i32.nxv16i32( + , + , + i64); + +define @intrinsic_vand_vv_nxv16i32_nxv16i32_nxv16i32( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vv_nxv16i32_nxv16i32_nxv16i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m8,ta,mu +; CHECK: vand.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vand.nxv16i32.nxv16i32( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vand.mask.nxv16i32.nxv16i32( + , + , + , + , + i64); + +define @intrinsic_vand_mask_vv_nxv16i32_nxv16i32_nxv16i32( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vv_nxv16i32_nxv16i32_nxv16i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m8,ta,mu +; CHECK: vand.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vand.mask.nxv16i32.nxv16i32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vand.nxv1i64.nxv1i64( + , + , + i64); + +define @intrinsic_vand_vv_nxv1i64_nxv1i64_nxv1i64( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vv_nxv1i64_nxv1i64_nxv1i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vand.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vand.nxv1i64.nxv1i64( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vand.mask.nxv1i64.nxv1i64( + , + , + , + , + i64); + +define @intrinsic_vand_mask_vv_nxv1i64_nxv1i64_nxv1i64( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vv_nxv1i64_nxv1i64_nxv1i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vand.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vand.mask.nxv1i64.nxv1i64( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vand.nxv2i64.nxv2i64( + , + , + i64); + +define @intrinsic_vand_vv_nxv2i64_nxv2i64_nxv2i64( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vv_nxv2i64_nxv2i64_nxv2i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vand.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vand.nxv2i64.nxv2i64( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vand.mask.nxv2i64.nxv2i64( + , + , + , + , + i64); + +define @intrinsic_vand_mask_vv_nxv2i64_nxv2i64_nxv2i64( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vv_nxv2i64_nxv2i64_nxv2i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vand.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vand.mask.nxv2i64.nxv2i64( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vand.nxv4i64.nxv4i64( + , + , + i64); + +define @intrinsic_vand_vv_nxv4i64_nxv4i64_nxv4i64( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vv_nxv4i64_nxv4i64_nxv4i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vand.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vand.nxv4i64.nxv4i64( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vand.mask.nxv4i64.nxv4i64( + , + , + , + , + i64); + +define @intrinsic_vand_mask_vv_nxv4i64_nxv4i64_nxv4i64( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vv_nxv4i64_nxv4i64_nxv4i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vand.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vand.mask.nxv4i64.nxv4i64( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vand.nxv8i64.nxv8i64( + , + , + i64); + +define @intrinsic_vand_vv_nxv8i64_nxv8i64_nxv8i64( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vv_nxv8i64_nxv8i64_nxv8i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m8,ta,mu +; CHECK: vand.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vand.nxv8i64.nxv8i64( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vand.mask.nxv8i64.nxv8i64( + , + , + , + , + i64); + +define @intrinsic_vand_mask_vv_nxv8i64_nxv8i64_nxv8i64( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vv_nxv8i64_nxv8i64_nxv8i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m8,ta,mu +; CHECK: vand.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vand.mask.nxv8i64.nxv8i64( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vand.nxv1i8.i8( + , + i8, + i64); + +define @intrinsic_vand_vx_nxv1i8_nxv1i8_i8( %0, i8 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vx_nxv1i8_nxv1i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vand.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vand.nxv1i8.i8( + %0, + i8 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vand.mask.nxv1i8.i8( + , + , + i8, + , + i64); + +define @intrinsic_vand_mask_vx_nxv1i8_nxv1i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vx_nxv1i8_nxv1i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vand.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vand.mask.nxv1i8.i8( + %0, + %1, + i8 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vand.nxv2i8.i8( + , + i8, + i64); + +define @intrinsic_vand_vx_nxv2i8_nxv2i8_i8( %0, i8 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vx_nxv2i8_nxv2i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vand.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vand.nxv2i8.i8( + %0, + i8 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vand.mask.nxv2i8.i8( + , + , + i8, + , + i64); + +define @intrinsic_vand_mask_vx_nxv2i8_nxv2i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vx_nxv2i8_nxv2i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vand.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vand.mask.nxv2i8.i8( + %0, + %1, + i8 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vand.nxv4i8.i8( + , + i8, + i64); + +define @intrinsic_vand_vx_nxv4i8_nxv4i8_i8( %0, i8 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vx_nxv4i8_nxv4i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vand.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vand.nxv4i8.i8( + %0, + i8 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vand.mask.nxv4i8.i8( + , + , + i8, + , + i64); + +define @intrinsic_vand_mask_vx_nxv4i8_nxv4i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vx_nxv4i8_nxv4i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vand.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vand.mask.nxv4i8.i8( + %0, + %1, + i8 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vand.nxv8i8.i8( + , + i8, + i64); + +define @intrinsic_vand_vx_nxv8i8_nxv8i8_i8( %0, i8 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vx_nxv8i8_nxv8i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vand.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vand.nxv8i8.i8( + %0, + i8 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vand.mask.nxv8i8.i8( + , + , + i8, + , + i64); + +define @intrinsic_vand_mask_vx_nxv8i8_nxv8i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vx_nxv8i8_nxv8i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vand.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vand.mask.nxv8i8.i8( + %0, + %1, + i8 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vand.nxv16i8.i8( + , + i8, + i64); + +define @intrinsic_vand_vx_nxv16i8_nxv16i8_i8( %0, i8 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vx_nxv16i8_nxv16i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vand.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vand.nxv16i8.i8( + %0, + i8 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vand.mask.nxv16i8.i8( + , + , + i8, + , + i64); + +define @intrinsic_vand_mask_vx_nxv16i8_nxv16i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vx_nxv16i8_nxv16i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vand.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vand.mask.nxv16i8.i8( + %0, + %1, + i8 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vand.nxv32i8.i8( + , + i8, + i64); + +define @intrinsic_vand_vx_nxv32i8_nxv32i8_i8( %0, i8 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vx_nxv32i8_nxv32i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vand.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vand.nxv32i8.i8( + %0, + i8 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vand.mask.nxv32i8.i8( + , + , + i8, + , + i64); + +define @intrinsic_vand_mask_vx_nxv32i8_nxv32i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vx_nxv32i8_nxv32i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vand.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vand.mask.nxv32i8.i8( + %0, + %1, + i8 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vand.nxv64i8.i8( + , + i8, + i64); + +define @intrinsic_vand_vx_nxv64i8_nxv64i8_i8( %0, i8 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vx_nxv64i8_nxv64i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m8,ta,mu +; CHECK: vand.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vand.nxv64i8.i8( + %0, + i8 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vand.mask.nxv64i8.i8( + , + , + i8, + , + i64); + +define @intrinsic_vand_mask_vx_nxv64i8_nxv64i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vx_nxv64i8_nxv64i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m8,ta,mu +; CHECK: vand.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vand.mask.nxv64i8.i8( + %0, + %1, + i8 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vand.nxv1i16.i16( + , + i16, + i64); + +define @intrinsic_vand_vx_nxv1i16_nxv1i16_i16( %0, i16 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vx_nxv1i16_nxv1i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vand.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vand.nxv1i16.i16( + %0, + i16 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vand.mask.nxv1i16.i16( + , + , + i16, + , + i64); + +define @intrinsic_vand_mask_vx_nxv1i16_nxv1i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vx_nxv1i16_nxv1i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vand.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vand.mask.nxv1i16.i16( + %0, + %1, + i16 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vand.nxv2i16.i16( + , + i16, + i64); + +define @intrinsic_vand_vx_nxv2i16_nxv2i16_i16( %0, i16 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vx_nxv2i16_nxv2i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vand.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vand.nxv2i16.i16( + %0, + i16 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vand.mask.nxv2i16.i16( + , + , + i16, + , + i64); + +define @intrinsic_vand_mask_vx_nxv2i16_nxv2i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vx_nxv2i16_nxv2i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vand.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vand.mask.nxv2i16.i16( + %0, + %1, + i16 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vand.nxv4i16.i16( + , + i16, + i64); + +define @intrinsic_vand_vx_nxv4i16_nxv4i16_i16( %0, i16 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vx_nxv4i16_nxv4i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vand.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vand.nxv4i16.i16( + %0, + i16 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vand.mask.nxv4i16.i16( + , + , + i16, + , + i64); + +define @intrinsic_vand_mask_vx_nxv4i16_nxv4i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vx_nxv4i16_nxv4i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vand.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vand.mask.nxv4i16.i16( + %0, + %1, + i16 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vand.nxv8i16.i16( + , + i16, + i64); + +define @intrinsic_vand_vx_nxv8i16_nxv8i16_i16( %0, i16 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vx_nxv8i16_nxv8i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vand.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vand.nxv8i16.i16( + %0, + i16 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vand.mask.nxv8i16.i16( + , + , + i16, + , + i64); + +define @intrinsic_vand_mask_vx_nxv8i16_nxv8i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vx_nxv8i16_nxv8i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vand.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vand.mask.nxv8i16.i16( + %0, + %1, + i16 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vand.nxv16i16.i16( + , + i16, + i64); + +define @intrinsic_vand_vx_nxv16i16_nxv16i16_i16( %0, i16 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vx_nxv16i16_nxv16i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vand.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vand.nxv16i16.i16( + %0, + i16 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vand.mask.nxv16i16.i16( + , + , + i16, + , + i64); + +define @intrinsic_vand_mask_vx_nxv16i16_nxv16i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vx_nxv16i16_nxv16i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vand.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vand.mask.nxv16i16.i16( + %0, + %1, + i16 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vand.nxv32i16.i16( + , + i16, + i64); + +define @intrinsic_vand_vx_nxv32i16_nxv32i16_i16( %0, i16 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vx_nxv32i16_nxv32i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m8,ta,mu +; CHECK: vand.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vand.nxv32i16.i16( + %0, + i16 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vand.mask.nxv32i16.i16( + , + , + i16, + , + i64); + +define @intrinsic_vand_mask_vx_nxv32i16_nxv32i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vx_nxv32i16_nxv32i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m8,ta,mu +; CHECK: vand.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vand.mask.nxv32i16.i16( + %0, + %1, + i16 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vand.nxv1i32.i32( + , + i32, + i64); + +define @intrinsic_vand_vx_nxv1i32_nxv1i32_i32( %0, i32 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vx_nxv1i32_nxv1i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vand.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vand.nxv1i32.i32( + %0, + i32 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vand.mask.nxv1i32.i32( + , + , + i32, + , + i64); + +define @intrinsic_vand_mask_vx_nxv1i32_nxv1i32_i32( %0, %1, i32 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vx_nxv1i32_nxv1i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vand.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vand.mask.nxv1i32.i32( + %0, + %1, + i32 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vand.nxv2i32.i32( + , + i32, + i64); + +define @intrinsic_vand_vx_nxv2i32_nxv2i32_i32( %0, i32 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vx_nxv2i32_nxv2i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vand.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vand.nxv2i32.i32( + %0, + i32 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vand.mask.nxv2i32.i32( + , + , + i32, + , + i64); + +define @intrinsic_vand_mask_vx_nxv2i32_nxv2i32_i32( %0, %1, i32 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vx_nxv2i32_nxv2i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vand.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vand.mask.nxv2i32.i32( + %0, + %1, + i32 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vand.nxv4i32.i32( + , + i32, + i64); + +define @intrinsic_vand_vx_nxv4i32_nxv4i32_i32( %0, i32 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vx_nxv4i32_nxv4i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vand.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vand.nxv4i32.i32( + %0, + i32 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vand.mask.nxv4i32.i32( + , + , + i32, + , + i64); + +define @intrinsic_vand_mask_vx_nxv4i32_nxv4i32_i32( %0, %1, i32 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vx_nxv4i32_nxv4i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vand.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vand.mask.nxv4i32.i32( + %0, + %1, + i32 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vand.nxv8i32.i32( + , + i32, + i64); + +define @intrinsic_vand_vx_nxv8i32_nxv8i32_i32( %0, i32 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vx_nxv8i32_nxv8i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vand.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vand.nxv8i32.i32( + %0, + i32 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vand.mask.nxv8i32.i32( + , + , + i32, + , + i64); + +define @intrinsic_vand_mask_vx_nxv8i32_nxv8i32_i32( %0, %1, i32 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vx_nxv8i32_nxv8i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vand.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vand.mask.nxv8i32.i32( + %0, + %1, + i32 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vand.nxv16i32.i32( + , + i32, + i64); + +define @intrinsic_vand_vx_nxv16i32_nxv16i32_i32( %0, i32 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vx_nxv16i32_nxv16i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m8,ta,mu +; CHECK: vand.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vand.nxv16i32.i32( + %0, + i32 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vand.mask.nxv16i32.i32( + , + , + i32, + , + i64); + +define @intrinsic_vand_mask_vx_nxv16i32_nxv16i32_i32( %0, %1, i32 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vx_nxv16i32_nxv16i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m8,ta,mu +; CHECK: vand.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vand.mask.nxv16i32.i32( + %0, + %1, + i32 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vand.nxv1i64.i64( + , + i64, + i64); + +define @intrinsic_vand_vx_nxv1i64_nxv1i64_i64( %0, i64 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vx_nxv1i64_nxv1i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vand.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vand.nxv1i64.i64( + %0, + i64 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vand.mask.nxv1i64.i64( + , + , + i64, + , + i64); + +define @intrinsic_vand_mask_vx_nxv1i64_nxv1i64_i64( %0, %1, i64 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vx_nxv1i64_nxv1i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vand.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vand.mask.nxv1i64.i64( + %0, + %1, + i64 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vand.nxv2i64.i64( + , + i64, + i64); + +define @intrinsic_vand_vx_nxv2i64_nxv2i64_i64( %0, i64 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vx_nxv2i64_nxv2i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vand.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vand.nxv2i64.i64( + %0, + i64 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vand.mask.nxv2i64.i64( + , + , + i64, + , + i64); + +define @intrinsic_vand_mask_vx_nxv2i64_nxv2i64_i64( %0, %1, i64 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vx_nxv2i64_nxv2i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vand.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vand.mask.nxv2i64.i64( + %0, + %1, + i64 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vand.nxv4i64.i64( + , + i64, + i64); + +define @intrinsic_vand_vx_nxv4i64_nxv4i64_i64( %0, i64 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vx_nxv4i64_nxv4i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vand.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vand.nxv4i64.i64( + %0, + i64 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vand.mask.nxv4i64.i64( + , + , + i64, + , + i64); + +define @intrinsic_vand_mask_vx_nxv4i64_nxv4i64_i64( %0, %1, i64 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vx_nxv4i64_nxv4i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vand.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vand.mask.nxv4i64.i64( + %0, + %1, + i64 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vand.nxv8i64.i64( + , + i64, + i64); + +define @intrinsic_vand_vx_nxv8i64_nxv8i64_i64( %0, i64 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vx_nxv8i64_nxv8i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m8,ta,mu +; CHECK: vand.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vand.nxv8i64.i64( + %0, + i64 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vand.mask.nxv8i64.i64( + , + , + i64, + , + i64); + +define @intrinsic_vand_mask_vx_nxv8i64_nxv8i64_i64( %0, %1, i64 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vx_nxv8i64_nxv8i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m8,ta,mu +; CHECK: vand.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vand.mask.nxv8i64.i64( + %0, + %1, + i64 %2, + %3, + i64 %4) + + ret %a +} + +define @intrinsic_vand_vi_nxv1i8_nxv1i8_i8( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vi_nxv1i8_nxv1i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vand.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vand.nxv1i8.i8( + %0, + i8 9, + i64 %1) + + ret %a +} + +define @intrinsic_vand_mask_vi_nxv1i8_nxv1i8_i8( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vi_nxv1i8_nxv1i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vand.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vand.mask.nxv1i8.i8( + %0, + %1, + i8 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vand_vi_nxv2i8_nxv2i8_i8( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vi_nxv2i8_nxv2i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vand.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vand.nxv2i8.i8( + %0, + i8 9, + i64 %1) + + ret %a +} + +define @intrinsic_vand_mask_vi_nxv2i8_nxv2i8_i8( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vi_nxv2i8_nxv2i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vand.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vand.mask.nxv2i8.i8( + %0, + %1, + i8 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vand_vi_nxv4i8_nxv4i8_i8( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vi_nxv4i8_nxv4i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vand.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vand.nxv4i8.i8( + %0, + i8 9, + i64 %1) + + ret %a +} + +define @intrinsic_vand_mask_vi_nxv4i8_nxv4i8_i8( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vi_nxv4i8_nxv4i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vand.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vand.mask.nxv4i8.i8( + %0, + %1, + i8 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vand_vi_nxv8i8_nxv8i8_i8( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vi_nxv8i8_nxv8i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vand.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vand.nxv8i8.i8( + %0, + i8 9, + i64 %1) + + ret %a +} + +define @intrinsic_vand_mask_vi_nxv8i8_nxv8i8_i8( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vi_nxv8i8_nxv8i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vand.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vand.mask.nxv8i8.i8( + %0, + %1, + i8 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vand_vi_nxv16i8_nxv16i8_i8( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vi_nxv16i8_nxv16i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vand.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vand.nxv16i8.i8( + %0, + i8 9, + i64 %1) + + ret %a +} + +define @intrinsic_vand_mask_vi_nxv16i8_nxv16i8_i8( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vi_nxv16i8_nxv16i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vand.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vand.mask.nxv16i8.i8( + %0, + %1, + i8 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vand_vi_nxv32i8_nxv32i8_i8( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vi_nxv32i8_nxv32i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vand.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vand.nxv32i8.i8( + %0, + i8 9, + i64 %1) + + ret %a +} + +define @intrinsic_vand_mask_vi_nxv32i8_nxv32i8_i8( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vi_nxv32i8_nxv32i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vand.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vand.mask.nxv32i8.i8( + %0, + %1, + i8 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vand_vi_nxv64i8_nxv64i8_i8( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vi_nxv64i8_nxv64i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m8,ta,mu +; CHECK: vand.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vand.nxv64i8.i8( + %0, + i8 9, + i64 %1) + + ret %a +} + +define @intrinsic_vand_mask_vi_nxv64i8_nxv64i8_i8( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vi_nxv64i8_nxv64i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m8,ta,mu +; CHECK: vand.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vand.mask.nxv64i8.i8( + %0, + %1, + i8 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vand_vi_nxv1i16_nxv1i16_i16( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vi_nxv1i16_nxv1i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vand.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vand.nxv1i16.i16( + %0, + i16 9, + i64 %1) + + ret %a +} + +define @intrinsic_vand_mask_vi_nxv1i16_nxv1i16_i16( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vi_nxv1i16_nxv1i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vand.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vand.mask.nxv1i16.i16( + %0, + %1, + i16 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vand_vi_nxv2i16_nxv2i16_i16( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vi_nxv2i16_nxv2i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vand.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vand.nxv2i16.i16( + %0, + i16 9, + i64 %1) + + ret %a +} + +define @intrinsic_vand_mask_vi_nxv2i16_nxv2i16_i16( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vi_nxv2i16_nxv2i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vand.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vand.mask.nxv2i16.i16( + %0, + %1, + i16 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vand_vi_nxv4i16_nxv4i16_i16( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vi_nxv4i16_nxv4i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vand.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vand.nxv4i16.i16( + %0, + i16 9, + i64 %1) + + ret %a +} + +define @intrinsic_vand_mask_vi_nxv4i16_nxv4i16_i16( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vi_nxv4i16_nxv4i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vand.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vand.mask.nxv4i16.i16( + %0, + %1, + i16 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vand_vi_nxv8i16_nxv8i16_i16( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vi_nxv8i16_nxv8i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vand.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vand.nxv8i16.i16( + %0, + i16 9, + i64 %1) + + ret %a +} + +define @intrinsic_vand_mask_vi_nxv8i16_nxv8i16_i16( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vi_nxv8i16_nxv8i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vand.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vand.mask.nxv8i16.i16( + %0, + %1, + i16 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vand_vi_nxv16i16_nxv16i16_i16( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vi_nxv16i16_nxv16i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vand.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vand.nxv16i16.i16( + %0, + i16 9, + i64 %1) + + ret %a +} + +define @intrinsic_vand_mask_vi_nxv16i16_nxv16i16_i16( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vi_nxv16i16_nxv16i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vand.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vand.mask.nxv16i16.i16( + %0, + %1, + i16 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vand_vi_nxv32i16_nxv32i16_i16( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vi_nxv32i16_nxv32i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m8,ta,mu +; CHECK: vand.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vand.nxv32i16.i16( + %0, + i16 9, + i64 %1) + + ret %a +} + +define @intrinsic_vand_mask_vi_nxv32i16_nxv32i16_i16( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vi_nxv32i16_nxv32i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m8,ta,mu +; CHECK: vand.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vand.mask.nxv32i16.i16( + %0, + %1, + i16 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vand_vi_nxv1i32_nxv1i32_i32( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vi_nxv1i32_nxv1i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vand.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vand.nxv1i32.i32( + %0, + i32 9, + i64 %1) + + ret %a +} + +define @intrinsic_vand_mask_vi_nxv1i32_nxv1i32_i32( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vi_nxv1i32_nxv1i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vand.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vand.mask.nxv1i32.i32( + %0, + %1, + i32 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vand_vi_nxv2i32_nxv2i32_i32( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vi_nxv2i32_nxv2i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vand.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vand.nxv2i32.i32( + %0, + i32 9, + i64 %1) + + ret %a +} + +define @intrinsic_vand_mask_vi_nxv2i32_nxv2i32_i32( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vi_nxv2i32_nxv2i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vand.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vand.mask.nxv2i32.i32( + %0, + %1, + i32 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vand_vi_nxv4i32_nxv4i32_i32( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vi_nxv4i32_nxv4i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vand.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vand.nxv4i32.i32( + %0, + i32 9, + i64 %1) + + ret %a +} + +define @intrinsic_vand_mask_vi_nxv4i32_nxv4i32_i32( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vi_nxv4i32_nxv4i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vand.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vand.mask.nxv4i32.i32( + %0, + %1, + i32 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vand_vi_nxv8i32_nxv8i32_i32( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vi_nxv8i32_nxv8i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vand.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vand.nxv8i32.i32( + %0, + i32 9, + i64 %1) + + ret %a +} + +define @intrinsic_vand_mask_vi_nxv8i32_nxv8i32_i32( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vi_nxv8i32_nxv8i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vand.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vand.mask.nxv8i32.i32( + %0, + %1, + i32 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vand_vi_nxv16i32_nxv16i32_i32( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vi_nxv16i32_nxv16i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m8,ta,mu +; CHECK: vand.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vand.nxv16i32.i32( + %0, + i32 9, + i64 %1) + + ret %a +} + +define @intrinsic_vand_mask_vi_nxv16i32_nxv16i32_i32( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vi_nxv16i32_nxv16i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m8,ta,mu +; CHECK: vand.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vand.mask.nxv16i32.i32( + %0, + %1, + i32 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vand_vi_nxv1i64_nxv1i64_i64( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vi_nxv1i64_nxv1i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vand.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vand.nxv1i64.i64( + %0, + i64 9, + i64 %1) + + ret %a +} + +define @intrinsic_vand_mask_vi_nxv1i64_nxv1i64_i64( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vi_nxv1i64_nxv1i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vand.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vand.mask.nxv1i64.i64( + %0, + %1, + i64 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vand_vi_nxv2i64_nxv2i64_i64( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vi_nxv2i64_nxv2i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vand.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vand.nxv2i64.i64( + %0, + i64 9, + i64 %1) + + ret %a +} + +define @intrinsic_vand_mask_vi_nxv2i64_nxv2i64_i64( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vi_nxv2i64_nxv2i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vand.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vand.mask.nxv2i64.i64( + %0, + %1, + i64 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vand_vi_nxv4i64_nxv4i64_i64( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vi_nxv4i64_nxv4i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vand.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vand.nxv4i64.i64( + %0, + i64 9, + i64 %1) + + ret %a +} + +define @intrinsic_vand_mask_vi_nxv4i64_nxv4i64_i64( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vi_nxv4i64_nxv4i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vand.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vand.mask.nxv4i64.i64( + %0, + %1, + i64 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vand_vi_nxv8i64_nxv8i64_i64( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_vi_nxv8i64_nxv8i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m8,ta,mu +; CHECK: vand.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vand.nxv8i64.i64( + %0, + i64 9, + i64 %1) + + ret %a +} + +define @intrinsic_vand_mask_vi_nxv8i64_nxv8i64_i64( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vand_mask_vi_nxv8i64_nxv8i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m8,ta,mu +; CHECK: vand.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vand.mask.nxv8i64.i64( + %0, + %1, + i64 9, + %2, + i64 %3) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vor-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vor-rv32.ll new file mode 100644 index 0000000000000..1ac44ba020ea1 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vor-rv32.ll @@ -0,0 +1,1945 @@ +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare @llvm.riscv.vor.nxv1i8.nxv1i8( + , + , + i32); + +define @intrinsic_vor_vv_nxv1i8_nxv1i8_nxv1i8( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vv_nxv1i8_nxv1i8_nxv1i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vor.nxv1i8.nxv1i8( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vor.mask.nxv1i8.nxv1i8( + , + , + , + , + i32); + +define @intrinsic_vor_mask_vv_nxv1i8_nxv1i8_nxv1i8( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vv_nxv1i8_nxv1i8_nxv1i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vor.mask.nxv1i8.nxv1i8( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vor.nxv2i8.nxv2i8( + , + , + i32); + +define @intrinsic_vor_vv_nxv2i8_nxv2i8_nxv2i8( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vv_nxv2i8_nxv2i8_nxv2i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vor.nxv2i8.nxv2i8( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vor.mask.nxv2i8.nxv2i8( + , + , + , + , + i32); + +define @intrinsic_vor_mask_vv_nxv2i8_nxv2i8_nxv2i8( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vv_nxv2i8_nxv2i8_nxv2i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vor.mask.nxv2i8.nxv2i8( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vor.nxv4i8.nxv4i8( + , + , + i32); + +define @intrinsic_vor_vv_nxv4i8_nxv4i8_nxv4i8( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vv_nxv4i8_nxv4i8_nxv4i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vor.nxv4i8.nxv4i8( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vor.mask.nxv4i8.nxv4i8( + , + , + , + , + i32); + +define @intrinsic_vor_mask_vv_nxv4i8_nxv4i8_nxv4i8( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vv_nxv4i8_nxv4i8_nxv4i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vor.mask.nxv4i8.nxv4i8( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vor.nxv8i8.nxv8i8( + , + , + i32); + +define @intrinsic_vor_vv_nxv8i8_nxv8i8_nxv8i8( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vv_nxv8i8_nxv8i8_nxv8i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vor.nxv8i8.nxv8i8( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vor.mask.nxv8i8.nxv8i8( + , + , + , + , + i32); + +define @intrinsic_vor_mask_vv_nxv8i8_nxv8i8_nxv8i8( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vv_nxv8i8_nxv8i8_nxv8i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vor.mask.nxv8i8.nxv8i8( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vor.nxv16i8.nxv16i8( + , + , + i32); + +define @intrinsic_vor_vv_nxv16i8_nxv16i8_nxv16i8( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vv_nxv16i8_nxv16i8_nxv16i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vor.nxv16i8.nxv16i8( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vor.mask.nxv16i8.nxv16i8( + , + , + , + , + i32); + +define @intrinsic_vor_mask_vv_nxv16i8_nxv16i8_nxv16i8( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vv_nxv16i8_nxv16i8_nxv16i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vor.mask.nxv16i8.nxv16i8( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vor.nxv32i8.nxv32i8( + , + , + i32); + +define @intrinsic_vor_vv_nxv32i8_nxv32i8_nxv32i8( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vv_nxv32i8_nxv32i8_nxv32i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vor.nxv32i8.nxv32i8( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vor.mask.nxv32i8.nxv32i8( + , + , + , + , + i32); + +define @intrinsic_vor_mask_vv_nxv32i8_nxv32i8_nxv32i8( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vv_nxv32i8_nxv32i8_nxv32i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vor.mask.nxv32i8.nxv32i8( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vor.nxv64i8.nxv64i8( + , + , + i32); + +define @intrinsic_vor_vv_nxv64i8_nxv64i8_nxv64i8( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vv_nxv64i8_nxv64i8_nxv64i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m8,ta,mu +; CHECK: vor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vor.nxv64i8.nxv64i8( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vor.mask.nxv64i8.nxv64i8( + , + , + , + , + i32); + +define @intrinsic_vor_mask_vv_nxv64i8_nxv64i8_nxv64i8( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vv_nxv64i8_nxv64i8_nxv64i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m8,ta,mu +; CHECK: vor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vor.mask.nxv64i8.nxv64i8( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vor.nxv1i16.nxv1i16( + , + , + i32); + +define @intrinsic_vor_vv_nxv1i16_nxv1i16_nxv1i16( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vv_nxv1i16_nxv1i16_nxv1i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vor.nxv1i16.nxv1i16( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vor.mask.nxv1i16.nxv1i16( + , + , + , + , + i32); + +define @intrinsic_vor_mask_vv_nxv1i16_nxv1i16_nxv1i16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vv_nxv1i16_nxv1i16_nxv1i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vor.mask.nxv1i16.nxv1i16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vor.nxv2i16.nxv2i16( + , + , + i32); + +define @intrinsic_vor_vv_nxv2i16_nxv2i16_nxv2i16( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vv_nxv2i16_nxv2i16_nxv2i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vor.nxv2i16.nxv2i16( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vor.mask.nxv2i16.nxv2i16( + , + , + , + , + i32); + +define @intrinsic_vor_mask_vv_nxv2i16_nxv2i16_nxv2i16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vv_nxv2i16_nxv2i16_nxv2i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vor.mask.nxv2i16.nxv2i16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vor.nxv4i16.nxv4i16( + , + , + i32); + +define @intrinsic_vor_vv_nxv4i16_nxv4i16_nxv4i16( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vv_nxv4i16_nxv4i16_nxv4i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vor.nxv4i16.nxv4i16( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vor.mask.nxv4i16.nxv4i16( + , + , + , + , + i32); + +define @intrinsic_vor_mask_vv_nxv4i16_nxv4i16_nxv4i16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vv_nxv4i16_nxv4i16_nxv4i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vor.mask.nxv4i16.nxv4i16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vor.nxv8i16.nxv8i16( + , + , + i32); + +define @intrinsic_vor_vv_nxv8i16_nxv8i16_nxv8i16( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vv_nxv8i16_nxv8i16_nxv8i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vor.nxv8i16.nxv8i16( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vor.mask.nxv8i16.nxv8i16( + , + , + , + , + i32); + +define @intrinsic_vor_mask_vv_nxv8i16_nxv8i16_nxv8i16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vv_nxv8i16_nxv8i16_nxv8i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vor.mask.nxv8i16.nxv8i16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vor.nxv16i16.nxv16i16( + , + , + i32); + +define @intrinsic_vor_vv_nxv16i16_nxv16i16_nxv16i16( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vv_nxv16i16_nxv16i16_nxv16i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vor.nxv16i16.nxv16i16( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vor.mask.nxv16i16.nxv16i16( + , + , + , + , + i32); + +define @intrinsic_vor_mask_vv_nxv16i16_nxv16i16_nxv16i16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vv_nxv16i16_nxv16i16_nxv16i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vor.mask.nxv16i16.nxv16i16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vor.nxv32i16.nxv32i16( + , + , + i32); + +define @intrinsic_vor_vv_nxv32i16_nxv32i16_nxv32i16( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vv_nxv32i16_nxv32i16_nxv32i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m8,ta,mu +; CHECK: vor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vor.nxv32i16.nxv32i16( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vor.mask.nxv32i16.nxv32i16( + , + , + , + , + i32); + +define @intrinsic_vor_mask_vv_nxv32i16_nxv32i16_nxv32i16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vv_nxv32i16_nxv32i16_nxv32i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m8,ta,mu +; CHECK: vor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vor.mask.nxv32i16.nxv32i16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vor.nxv1i32.nxv1i32( + , + , + i32); + +define @intrinsic_vor_vv_nxv1i32_nxv1i32_nxv1i32( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vv_nxv1i32_nxv1i32_nxv1i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vor.nxv1i32.nxv1i32( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vor.mask.nxv1i32.nxv1i32( + , + , + , + , + i32); + +define @intrinsic_vor_mask_vv_nxv1i32_nxv1i32_nxv1i32( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vv_nxv1i32_nxv1i32_nxv1i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vor.mask.nxv1i32.nxv1i32( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vor.nxv2i32.nxv2i32( + , + , + i32); + +define @intrinsic_vor_vv_nxv2i32_nxv2i32_nxv2i32( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vv_nxv2i32_nxv2i32_nxv2i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vor.nxv2i32.nxv2i32( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vor.mask.nxv2i32.nxv2i32( + , + , + , + , + i32); + +define @intrinsic_vor_mask_vv_nxv2i32_nxv2i32_nxv2i32( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vv_nxv2i32_nxv2i32_nxv2i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vor.mask.nxv2i32.nxv2i32( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vor.nxv4i32.nxv4i32( + , + , + i32); + +define @intrinsic_vor_vv_nxv4i32_nxv4i32_nxv4i32( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vv_nxv4i32_nxv4i32_nxv4i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vor.nxv4i32.nxv4i32( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vor.mask.nxv4i32.nxv4i32( + , + , + , + , + i32); + +define @intrinsic_vor_mask_vv_nxv4i32_nxv4i32_nxv4i32( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vv_nxv4i32_nxv4i32_nxv4i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vor.mask.nxv4i32.nxv4i32( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vor.nxv8i32.nxv8i32( + , + , + i32); + +define @intrinsic_vor_vv_nxv8i32_nxv8i32_nxv8i32( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vv_nxv8i32_nxv8i32_nxv8i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vor.nxv8i32.nxv8i32( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vor.mask.nxv8i32.nxv8i32( + , + , + , + , + i32); + +define @intrinsic_vor_mask_vv_nxv8i32_nxv8i32_nxv8i32( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vv_nxv8i32_nxv8i32_nxv8i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vor.mask.nxv8i32.nxv8i32( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vor.nxv16i32.nxv16i32( + , + , + i32); + +define @intrinsic_vor_vv_nxv16i32_nxv16i32_nxv16i32( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vv_nxv16i32_nxv16i32_nxv16i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m8,ta,mu +; CHECK: vor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vor.nxv16i32.nxv16i32( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vor.mask.nxv16i32.nxv16i32( + , + , + , + , + i32); + +define @intrinsic_vor_mask_vv_nxv16i32_nxv16i32_nxv16i32( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vv_nxv16i32_nxv16i32_nxv16i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m8,ta,mu +; CHECK: vor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vor.mask.nxv16i32.nxv16i32( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vor.nxv1i8.i8( + , + i8, + i32); + +define @intrinsic_vor_vx_nxv1i8_nxv1i8_i8( %0, i8 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vx_nxv1i8_nxv1i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vor.nxv1i8.i8( + %0, + i8 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vor.mask.nxv1i8.i8( + , + , + i8, + , + i32); + +define @intrinsic_vor_mask_vx_nxv1i8_nxv1i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vx_nxv1i8_nxv1i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vor.mask.nxv1i8.i8( + %0, + %1, + i8 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vor.nxv2i8.i8( + , + i8, + i32); + +define @intrinsic_vor_vx_nxv2i8_nxv2i8_i8( %0, i8 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vx_nxv2i8_nxv2i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vor.nxv2i8.i8( + %0, + i8 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vor.mask.nxv2i8.i8( + , + , + i8, + , + i32); + +define @intrinsic_vor_mask_vx_nxv2i8_nxv2i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vx_nxv2i8_nxv2i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vor.mask.nxv2i8.i8( + %0, + %1, + i8 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vor.nxv4i8.i8( + , + i8, + i32); + +define @intrinsic_vor_vx_nxv4i8_nxv4i8_i8( %0, i8 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vx_nxv4i8_nxv4i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vor.nxv4i8.i8( + %0, + i8 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vor.mask.nxv4i8.i8( + , + , + i8, + , + i32); + +define @intrinsic_vor_mask_vx_nxv4i8_nxv4i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vx_nxv4i8_nxv4i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vor.mask.nxv4i8.i8( + %0, + %1, + i8 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vor.nxv8i8.i8( + , + i8, + i32); + +define @intrinsic_vor_vx_nxv8i8_nxv8i8_i8( %0, i8 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vx_nxv8i8_nxv8i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vor.nxv8i8.i8( + %0, + i8 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vor.mask.nxv8i8.i8( + , + , + i8, + , + i32); + +define @intrinsic_vor_mask_vx_nxv8i8_nxv8i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vx_nxv8i8_nxv8i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vor.mask.nxv8i8.i8( + %0, + %1, + i8 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vor.nxv16i8.i8( + , + i8, + i32); + +define @intrinsic_vor_vx_nxv16i8_nxv16i8_i8( %0, i8 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vx_nxv16i8_nxv16i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vor.nxv16i8.i8( + %0, + i8 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vor.mask.nxv16i8.i8( + , + , + i8, + , + i32); + +define @intrinsic_vor_mask_vx_nxv16i8_nxv16i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vx_nxv16i8_nxv16i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vor.mask.nxv16i8.i8( + %0, + %1, + i8 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vor.nxv32i8.i8( + , + i8, + i32); + +define @intrinsic_vor_vx_nxv32i8_nxv32i8_i8( %0, i8 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vx_nxv32i8_nxv32i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vor.nxv32i8.i8( + %0, + i8 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vor.mask.nxv32i8.i8( + , + , + i8, + , + i32); + +define @intrinsic_vor_mask_vx_nxv32i8_nxv32i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vx_nxv32i8_nxv32i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vor.mask.nxv32i8.i8( + %0, + %1, + i8 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vor.nxv64i8.i8( + , + i8, + i32); + +define @intrinsic_vor_vx_nxv64i8_nxv64i8_i8( %0, i8 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vx_nxv64i8_nxv64i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m8,ta,mu +; CHECK: vor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vor.nxv64i8.i8( + %0, + i8 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vor.mask.nxv64i8.i8( + , + , + i8, + , + i32); + +define @intrinsic_vor_mask_vx_nxv64i8_nxv64i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vx_nxv64i8_nxv64i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m8,ta,mu +; CHECK: vor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vor.mask.nxv64i8.i8( + %0, + %1, + i8 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vor.nxv1i16.i16( + , + i16, + i32); + +define @intrinsic_vor_vx_nxv1i16_nxv1i16_i16( %0, i16 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vx_nxv1i16_nxv1i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vor.nxv1i16.i16( + %0, + i16 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vor.mask.nxv1i16.i16( + , + , + i16, + , + i32); + +define @intrinsic_vor_mask_vx_nxv1i16_nxv1i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vx_nxv1i16_nxv1i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vor.mask.nxv1i16.i16( + %0, + %1, + i16 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vor.nxv2i16.i16( + , + i16, + i32); + +define @intrinsic_vor_vx_nxv2i16_nxv2i16_i16( %0, i16 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vx_nxv2i16_nxv2i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vor.nxv2i16.i16( + %0, + i16 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vor.mask.nxv2i16.i16( + , + , + i16, + , + i32); + +define @intrinsic_vor_mask_vx_nxv2i16_nxv2i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vx_nxv2i16_nxv2i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vor.mask.nxv2i16.i16( + %0, + %1, + i16 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vor.nxv4i16.i16( + , + i16, + i32); + +define @intrinsic_vor_vx_nxv4i16_nxv4i16_i16( %0, i16 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vx_nxv4i16_nxv4i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vor.nxv4i16.i16( + %0, + i16 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vor.mask.nxv4i16.i16( + , + , + i16, + , + i32); + +define @intrinsic_vor_mask_vx_nxv4i16_nxv4i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vx_nxv4i16_nxv4i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vor.mask.nxv4i16.i16( + %0, + %1, + i16 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vor.nxv8i16.i16( + , + i16, + i32); + +define @intrinsic_vor_vx_nxv8i16_nxv8i16_i16( %0, i16 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vx_nxv8i16_nxv8i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vor.nxv8i16.i16( + %0, + i16 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vor.mask.nxv8i16.i16( + , + , + i16, + , + i32); + +define @intrinsic_vor_mask_vx_nxv8i16_nxv8i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vx_nxv8i16_nxv8i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vor.mask.nxv8i16.i16( + %0, + %1, + i16 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vor.nxv16i16.i16( + , + i16, + i32); + +define @intrinsic_vor_vx_nxv16i16_nxv16i16_i16( %0, i16 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vx_nxv16i16_nxv16i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vor.nxv16i16.i16( + %0, + i16 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vor.mask.nxv16i16.i16( + , + , + i16, + , + i32); + +define @intrinsic_vor_mask_vx_nxv16i16_nxv16i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vx_nxv16i16_nxv16i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vor.mask.nxv16i16.i16( + %0, + %1, + i16 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vor.nxv32i16.i16( + , + i16, + i32); + +define @intrinsic_vor_vx_nxv32i16_nxv32i16_i16( %0, i16 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vx_nxv32i16_nxv32i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m8,ta,mu +; CHECK: vor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vor.nxv32i16.i16( + %0, + i16 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vor.mask.nxv32i16.i16( + , + , + i16, + , + i32); + +define @intrinsic_vor_mask_vx_nxv32i16_nxv32i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vx_nxv32i16_nxv32i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m8,ta,mu +; CHECK: vor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vor.mask.nxv32i16.i16( + %0, + %1, + i16 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vor.nxv1i32.i32( + , + i32, + i32); + +define @intrinsic_vor_vx_nxv1i32_nxv1i32_i32( %0, i32 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vx_nxv1i32_nxv1i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vor.nxv1i32.i32( + %0, + i32 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vor.mask.nxv1i32.i32( + , + , + i32, + , + i32); + +define @intrinsic_vor_mask_vx_nxv1i32_nxv1i32_i32( %0, %1, i32 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vx_nxv1i32_nxv1i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vor.mask.nxv1i32.i32( + %0, + %1, + i32 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vor.nxv2i32.i32( + , + i32, + i32); + +define @intrinsic_vor_vx_nxv2i32_nxv2i32_i32( %0, i32 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vx_nxv2i32_nxv2i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vor.nxv2i32.i32( + %0, + i32 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vor.mask.nxv2i32.i32( + , + , + i32, + , + i32); + +define @intrinsic_vor_mask_vx_nxv2i32_nxv2i32_i32( %0, %1, i32 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vx_nxv2i32_nxv2i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vor.mask.nxv2i32.i32( + %0, + %1, + i32 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vor.nxv4i32.i32( + , + i32, + i32); + +define @intrinsic_vor_vx_nxv4i32_nxv4i32_i32( %0, i32 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vx_nxv4i32_nxv4i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vor.nxv4i32.i32( + %0, + i32 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vor.mask.nxv4i32.i32( + , + , + i32, + , + i32); + +define @intrinsic_vor_mask_vx_nxv4i32_nxv4i32_i32( %0, %1, i32 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vx_nxv4i32_nxv4i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vor.mask.nxv4i32.i32( + %0, + %1, + i32 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vor.nxv8i32.i32( + , + i32, + i32); + +define @intrinsic_vor_vx_nxv8i32_nxv8i32_i32( %0, i32 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vx_nxv8i32_nxv8i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vor.nxv8i32.i32( + %0, + i32 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vor.mask.nxv8i32.i32( + , + , + i32, + , + i32); + +define @intrinsic_vor_mask_vx_nxv8i32_nxv8i32_i32( %0, %1, i32 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vx_nxv8i32_nxv8i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vor.mask.nxv8i32.i32( + %0, + %1, + i32 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vor.nxv16i32.i32( + , + i32, + i32); + +define @intrinsic_vor_vx_nxv16i32_nxv16i32_i32( %0, i32 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vx_nxv16i32_nxv16i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m8,ta,mu +; CHECK: vor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vor.nxv16i32.i32( + %0, + i32 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vor.mask.nxv16i32.i32( + , + , + i32, + , + i32); + +define @intrinsic_vor_mask_vx_nxv16i32_nxv16i32_i32( %0, %1, i32 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vx_nxv16i32_nxv16i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m8,ta,mu +; CHECK: vor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vor.mask.nxv16i32.i32( + %0, + %1, + i32 %2, + %3, + i32 %4) + + ret %a +} + +define @intrinsic_vor_vi_nxv1i8_nxv1i8_i8( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vi_nxv1i8_nxv1i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vor.nxv1i8.i8( + %0, + i8 9, + i32 %1) + + ret %a +} + +define @intrinsic_vor_mask_vi_nxv1i8_nxv1i8_i8( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vi_nxv1i8_nxv1i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vor.mask.nxv1i8.i8( + %0, + %1, + i8 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vor_vi_nxv2i8_nxv2i8_i8( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vi_nxv2i8_nxv2i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vor.nxv2i8.i8( + %0, + i8 9, + i32 %1) + + ret %a +} + +define @intrinsic_vor_mask_vi_nxv2i8_nxv2i8_i8( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vi_nxv2i8_nxv2i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vor.mask.nxv2i8.i8( + %0, + %1, + i8 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vor_vi_nxv4i8_nxv4i8_i8( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vi_nxv4i8_nxv4i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vor.nxv4i8.i8( + %0, + i8 9, + i32 %1) + + ret %a +} + +define @intrinsic_vor_mask_vi_nxv4i8_nxv4i8_i8( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vi_nxv4i8_nxv4i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vor.mask.nxv4i8.i8( + %0, + %1, + i8 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vor_vi_nxv8i8_nxv8i8_i8( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vi_nxv8i8_nxv8i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vor.nxv8i8.i8( + %0, + i8 9, + i32 %1) + + ret %a +} + +define @intrinsic_vor_mask_vi_nxv8i8_nxv8i8_i8( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vi_nxv8i8_nxv8i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vor.mask.nxv8i8.i8( + %0, + %1, + i8 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vor_vi_nxv16i8_nxv16i8_i8( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vi_nxv16i8_nxv16i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vor.nxv16i8.i8( + %0, + i8 9, + i32 %1) + + ret %a +} + +define @intrinsic_vor_mask_vi_nxv16i8_nxv16i8_i8( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vi_nxv16i8_nxv16i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vor.mask.nxv16i8.i8( + %0, + %1, + i8 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vor_vi_nxv32i8_nxv32i8_i8( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vi_nxv32i8_nxv32i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vor.nxv32i8.i8( + %0, + i8 9, + i32 %1) + + ret %a +} + +define @intrinsic_vor_mask_vi_nxv32i8_nxv32i8_i8( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vi_nxv32i8_nxv32i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vor.mask.nxv32i8.i8( + %0, + %1, + i8 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vor_vi_nxv64i8_nxv64i8_i8( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vi_nxv64i8_nxv64i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m8,ta,mu +; CHECK: vor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vor.nxv64i8.i8( + %0, + i8 9, + i32 %1) + + ret %a +} + +define @intrinsic_vor_mask_vi_nxv64i8_nxv64i8_i8( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vi_nxv64i8_nxv64i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m8,ta,mu +; CHECK: vor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vor.mask.nxv64i8.i8( + %0, + %1, + i8 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vor_vi_nxv1i16_nxv1i16_i16( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vi_nxv1i16_nxv1i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vor.nxv1i16.i16( + %0, + i16 9, + i32 %1) + + ret %a +} + +define @intrinsic_vor_mask_vi_nxv1i16_nxv1i16_i16( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vi_nxv1i16_nxv1i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vor.mask.nxv1i16.i16( + %0, + %1, + i16 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vor_vi_nxv2i16_nxv2i16_i16( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vi_nxv2i16_nxv2i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vor.nxv2i16.i16( + %0, + i16 9, + i32 %1) + + ret %a +} + +define @intrinsic_vor_mask_vi_nxv2i16_nxv2i16_i16( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vi_nxv2i16_nxv2i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vor.mask.nxv2i16.i16( + %0, + %1, + i16 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vor_vi_nxv4i16_nxv4i16_i16( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vi_nxv4i16_nxv4i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vor.nxv4i16.i16( + %0, + i16 9, + i32 %1) + + ret %a +} + +define @intrinsic_vor_mask_vi_nxv4i16_nxv4i16_i16( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vi_nxv4i16_nxv4i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vor.mask.nxv4i16.i16( + %0, + %1, + i16 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vor_vi_nxv8i16_nxv8i16_i16( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vi_nxv8i16_nxv8i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vor.nxv8i16.i16( + %0, + i16 9, + i32 %1) + + ret %a +} + +define @intrinsic_vor_mask_vi_nxv8i16_nxv8i16_i16( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vi_nxv8i16_nxv8i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vor.mask.nxv8i16.i16( + %0, + %1, + i16 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vor_vi_nxv16i16_nxv16i16_i16( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vi_nxv16i16_nxv16i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vor.nxv16i16.i16( + %0, + i16 9, + i32 %1) + + ret %a +} + +define @intrinsic_vor_mask_vi_nxv16i16_nxv16i16_i16( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vi_nxv16i16_nxv16i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vor.mask.nxv16i16.i16( + %0, + %1, + i16 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vor_vi_nxv32i16_nxv32i16_i16( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vi_nxv32i16_nxv32i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m8,ta,mu +; CHECK: vor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vor.nxv32i16.i16( + %0, + i16 9, + i32 %1) + + ret %a +} + +define @intrinsic_vor_mask_vi_nxv32i16_nxv32i16_i16( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vi_nxv32i16_nxv32i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m8,ta,mu +; CHECK: vor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vor.mask.nxv32i16.i16( + %0, + %1, + i16 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vor_vi_nxv1i32_nxv1i32_i32( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vi_nxv1i32_nxv1i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vor.nxv1i32.i32( + %0, + i32 9, + i32 %1) + + ret %a +} + +define @intrinsic_vor_mask_vi_nxv1i32_nxv1i32_i32( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vi_nxv1i32_nxv1i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vor.mask.nxv1i32.i32( + %0, + %1, + i32 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vor_vi_nxv2i32_nxv2i32_i32( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vi_nxv2i32_nxv2i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vor.nxv2i32.i32( + %0, + i32 9, + i32 %1) + + ret %a +} + +define @intrinsic_vor_mask_vi_nxv2i32_nxv2i32_i32( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vi_nxv2i32_nxv2i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vor.mask.nxv2i32.i32( + %0, + %1, + i32 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vor_vi_nxv4i32_nxv4i32_i32( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vi_nxv4i32_nxv4i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vor.nxv4i32.i32( + %0, + i32 9, + i32 %1) + + ret %a +} + +define @intrinsic_vor_mask_vi_nxv4i32_nxv4i32_i32( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vi_nxv4i32_nxv4i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vor.mask.nxv4i32.i32( + %0, + %1, + i32 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vor_vi_nxv8i32_nxv8i32_i32( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vi_nxv8i32_nxv8i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vor.nxv8i32.i32( + %0, + i32 9, + i32 %1) + + ret %a +} + +define @intrinsic_vor_mask_vi_nxv8i32_nxv8i32_i32( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vi_nxv8i32_nxv8i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vor.mask.nxv8i32.i32( + %0, + %1, + i32 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vor_vi_nxv16i32_nxv16i32_i32( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vi_nxv16i32_nxv16i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m8,ta,mu +; CHECK: vor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vor.nxv16i32.i32( + %0, + i32 9, + i32 %1) + + ret %a +} + +define @intrinsic_vor_mask_vi_nxv16i32_nxv16i32_i32( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vi_nxv16i32_nxv16i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m8,ta,mu +; CHECK: vor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vor.mask.nxv16i32.i32( + %0, + %1, + i32 9, + %2, + i32 %3) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vor-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vor-rv64.ll new file mode 100644 index 0000000000000..ce2107138f93a --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vor-rv64.ll @@ -0,0 +1,2377 @@ +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare @llvm.riscv.vor.nxv1i8.nxv1i8( + , + , + i64); + +define @intrinsic_vor_vv_nxv1i8_nxv1i8_nxv1i8( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vv_nxv1i8_nxv1i8_nxv1i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vor.nxv1i8.nxv1i8( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vor.mask.nxv1i8.nxv1i8( + , + , + , + , + i64); + +define @intrinsic_vor_mask_vv_nxv1i8_nxv1i8_nxv1i8( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vv_nxv1i8_nxv1i8_nxv1i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vor.mask.nxv1i8.nxv1i8( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vor.nxv2i8.nxv2i8( + , + , + i64); + +define @intrinsic_vor_vv_nxv2i8_nxv2i8_nxv2i8( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vv_nxv2i8_nxv2i8_nxv2i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vor.nxv2i8.nxv2i8( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vor.mask.nxv2i8.nxv2i8( + , + , + , + , + i64); + +define @intrinsic_vor_mask_vv_nxv2i8_nxv2i8_nxv2i8( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vv_nxv2i8_nxv2i8_nxv2i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vor.mask.nxv2i8.nxv2i8( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vor.nxv4i8.nxv4i8( + , + , + i64); + +define @intrinsic_vor_vv_nxv4i8_nxv4i8_nxv4i8( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vv_nxv4i8_nxv4i8_nxv4i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vor.nxv4i8.nxv4i8( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vor.mask.nxv4i8.nxv4i8( + , + , + , + , + i64); + +define @intrinsic_vor_mask_vv_nxv4i8_nxv4i8_nxv4i8( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vv_nxv4i8_nxv4i8_nxv4i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vor.mask.nxv4i8.nxv4i8( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vor.nxv8i8.nxv8i8( + , + , + i64); + +define @intrinsic_vor_vv_nxv8i8_nxv8i8_nxv8i8( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vv_nxv8i8_nxv8i8_nxv8i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vor.nxv8i8.nxv8i8( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vor.mask.nxv8i8.nxv8i8( + , + , + , + , + i64); + +define @intrinsic_vor_mask_vv_nxv8i8_nxv8i8_nxv8i8( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vv_nxv8i8_nxv8i8_nxv8i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vor.mask.nxv8i8.nxv8i8( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vor.nxv16i8.nxv16i8( + , + , + i64); + +define @intrinsic_vor_vv_nxv16i8_nxv16i8_nxv16i8( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vv_nxv16i8_nxv16i8_nxv16i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vor.nxv16i8.nxv16i8( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vor.mask.nxv16i8.nxv16i8( + , + , + , + , + i64); + +define @intrinsic_vor_mask_vv_nxv16i8_nxv16i8_nxv16i8( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vv_nxv16i8_nxv16i8_nxv16i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vor.mask.nxv16i8.nxv16i8( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vor.nxv32i8.nxv32i8( + , + , + i64); + +define @intrinsic_vor_vv_nxv32i8_nxv32i8_nxv32i8( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vv_nxv32i8_nxv32i8_nxv32i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vor.nxv32i8.nxv32i8( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vor.mask.nxv32i8.nxv32i8( + , + , + , + , + i64); + +define @intrinsic_vor_mask_vv_nxv32i8_nxv32i8_nxv32i8( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vv_nxv32i8_nxv32i8_nxv32i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vor.mask.nxv32i8.nxv32i8( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vor.nxv64i8.nxv64i8( + , + , + i64); + +define @intrinsic_vor_vv_nxv64i8_nxv64i8_nxv64i8( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vv_nxv64i8_nxv64i8_nxv64i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m8,ta,mu +; CHECK: vor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vor.nxv64i8.nxv64i8( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vor.mask.nxv64i8.nxv64i8( + , + , + , + , + i64); + +define @intrinsic_vor_mask_vv_nxv64i8_nxv64i8_nxv64i8( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vv_nxv64i8_nxv64i8_nxv64i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m8,ta,mu +; CHECK: vor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vor.mask.nxv64i8.nxv64i8( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vor.nxv1i16.nxv1i16( + , + , + i64); + +define @intrinsic_vor_vv_nxv1i16_nxv1i16_nxv1i16( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vv_nxv1i16_nxv1i16_nxv1i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vor.nxv1i16.nxv1i16( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vor.mask.nxv1i16.nxv1i16( + , + , + , + , + i64); + +define @intrinsic_vor_mask_vv_nxv1i16_nxv1i16_nxv1i16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vv_nxv1i16_nxv1i16_nxv1i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vor.mask.nxv1i16.nxv1i16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vor.nxv2i16.nxv2i16( + , + , + i64); + +define @intrinsic_vor_vv_nxv2i16_nxv2i16_nxv2i16( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vv_nxv2i16_nxv2i16_nxv2i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vor.nxv2i16.nxv2i16( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vor.mask.nxv2i16.nxv2i16( + , + , + , + , + i64); + +define @intrinsic_vor_mask_vv_nxv2i16_nxv2i16_nxv2i16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vv_nxv2i16_nxv2i16_nxv2i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vor.mask.nxv2i16.nxv2i16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vor.nxv4i16.nxv4i16( + , + , + i64); + +define @intrinsic_vor_vv_nxv4i16_nxv4i16_nxv4i16( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vv_nxv4i16_nxv4i16_nxv4i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vor.nxv4i16.nxv4i16( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vor.mask.nxv4i16.nxv4i16( + , + , + , + , + i64); + +define @intrinsic_vor_mask_vv_nxv4i16_nxv4i16_nxv4i16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vv_nxv4i16_nxv4i16_nxv4i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vor.mask.nxv4i16.nxv4i16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vor.nxv8i16.nxv8i16( + , + , + i64); + +define @intrinsic_vor_vv_nxv8i16_nxv8i16_nxv8i16( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vv_nxv8i16_nxv8i16_nxv8i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vor.nxv8i16.nxv8i16( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vor.mask.nxv8i16.nxv8i16( + , + , + , + , + i64); + +define @intrinsic_vor_mask_vv_nxv8i16_nxv8i16_nxv8i16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vv_nxv8i16_nxv8i16_nxv8i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vor.mask.nxv8i16.nxv8i16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vor.nxv16i16.nxv16i16( + , + , + i64); + +define @intrinsic_vor_vv_nxv16i16_nxv16i16_nxv16i16( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vv_nxv16i16_nxv16i16_nxv16i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vor.nxv16i16.nxv16i16( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vor.mask.nxv16i16.nxv16i16( + , + , + , + , + i64); + +define @intrinsic_vor_mask_vv_nxv16i16_nxv16i16_nxv16i16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vv_nxv16i16_nxv16i16_nxv16i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vor.mask.nxv16i16.nxv16i16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vor.nxv32i16.nxv32i16( + , + , + i64); + +define @intrinsic_vor_vv_nxv32i16_nxv32i16_nxv32i16( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vv_nxv32i16_nxv32i16_nxv32i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m8,ta,mu +; CHECK: vor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vor.nxv32i16.nxv32i16( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vor.mask.nxv32i16.nxv32i16( + , + , + , + , + i64); + +define @intrinsic_vor_mask_vv_nxv32i16_nxv32i16_nxv32i16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vv_nxv32i16_nxv32i16_nxv32i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m8,ta,mu +; CHECK: vor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vor.mask.nxv32i16.nxv32i16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vor.nxv1i32.nxv1i32( + , + , + i64); + +define @intrinsic_vor_vv_nxv1i32_nxv1i32_nxv1i32( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vv_nxv1i32_nxv1i32_nxv1i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vor.nxv1i32.nxv1i32( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vor.mask.nxv1i32.nxv1i32( + , + , + , + , + i64); + +define @intrinsic_vor_mask_vv_nxv1i32_nxv1i32_nxv1i32( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vv_nxv1i32_nxv1i32_nxv1i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vor.mask.nxv1i32.nxv1i32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vor.nxv2i32.nxv2i32( + , + , + i64); + +define @intrinsic_vor_vv_nxv2i32_nxv2i32_nxv2i32( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vv_nxv2i32_nxv2i32_nxv2i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vor.nxv2i32.nxv2i32( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vor.mask.nxv2i32.nxv2i32( + , + , + , + , + i64); + +define @intrinsic_vor_mask_vv_nxv2i32_nxv2i32_nxv2i32( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vv_nxv2i32_nxv2i32_nxv2i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vor.mask.nxv2i32.nxv2i32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vor.nxv4i32.nxv4i32( + , + , + i64); + +define @intrinsic_vor_vv_nxv4i32_nxv4i32_nxv4i32( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vv_nxv4i32_nxv4i32_nxv4i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vor.nxv4i32.nxv4i32( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vor.mask.nxv4i32.nxv4i32( + , + , + , + , + i64); + +define @intrinsic_vor_mask_vv_nxv4i32_nxv4i32_nxv4i32( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vv_nxv4i32_nxv4i32_nxv4i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vor.mask.nxv4i32.nxv4i32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vor.nxv8i32.nxv8i32( + , + , + i64); + +define @intrinsic_vor_vv_nxv8i32_nxv8i32_nxv8i32( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vv_nxv8i32_nxv8i32_nxv8i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vor.nxv8i32.nxv8i32( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vor.mask.nxv8i32.nxv8i32( + , + , + , + , + i64); + +define @intrinsic_vor_mask_vv_nxv8i32_nxv8i32_nxv8i32( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vv_nxv8i32_nxv8i32_nxv8i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vor.mask.nxv8i32.nxv8i32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vor.nxv16i32.nxv16i32( + , + , + i64); + +define @intrinsic_vor_vv_nxv16i32_nxv16i32_nxv16i32( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vv_nxv16i32_nxv16i32_nxv16i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m8,ta,mu +; CHECK: vor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vor.nxv16i32.nxv16i32( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vor.mask.nxv16i32.nxv16i32( + , + , + , + , + i64); + +define @intrinsic_vor_mask_vv_nxv16i32_nxv16i32_nxv16i32( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vv_nxv16i32_nxv16i32_nxv16i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m8,ta,mu +; CHECK: vor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vor.mask.nxv16i32.nxv16i32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vor.nxv1i64.nxv1i64( + , + , + i64); + +define @intrinsic_vor_vv_nxv1i64_nxv1i64_nxv1i64( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vv_nxv1i64_nxv1i64_nxv1i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vor.nxv1i64.nxv1i64( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vor.mask.nxv1i64.nxv1i64( + , + , + , + , + i64); + +define @intrinsic_vor_mask_vv_nxv1i64_nxv1i64_nxv1i64( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vv_nxv1i64_nxv1i64_nxv1i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vor.mask.nxv1i64.nxv1i64( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vor.nxv2i64.nxv2i64( + , + , + i64); + +define @intrinsic_vor_vv_nxv2i64_nxv2i64_nxv2i64( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vv_nxv2i64_nxv2i64_nxv2i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vor.nxv2i64.nxv2i64( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vor.mask.nxv2i64.nxv2i64( + , + , + , + , + i64); + +define @intrinsic_vor_mask_vv_nxv2i64_nxv2i64_nxv2i64( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vv_nxv2i64_nxv2i64_nxv2i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vor.mask.nxv2i64.nxv2i64( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vor.nxv4i64.nxv4i64( + , + , + i64); + +define @intrinsic_vor_vv_nxv4i64_nxv4i64_nxv4i64( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vv_nxv4i64_nxv4i64_nxv4i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vor.nxv4i64.nxv4i64( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vor.mask.nxv4i64.nxv4i64( + , + , + , + , + i64); + +define @intrinsic_vor_mask_vv_nxv4i64_nxv4i64_nxv4i64( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vv_nxv4i64_nxv4i64_nxv4i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vor.mask.nxv4i64.nxv4i64( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vor.nxv8i64.nxv8i64( + , + , + i64); + +define @intrinsic_vor_vv_nxv8i64_nxv8i64_nxv8i64( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vv_nxv8i64_nxv8i64_nxv8i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m8,ta,mu +; CHECK: vor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vor.nxv8i64.nxv8i64( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vor.mask.nxv8i64.nxv8i64( + , + , + , + , + i64); + +define @intrinsic_vor_mask_vv_nxv8i64_nxv8i64_nxv8i64( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vv_nxv8i64_nxv8i64_nxv8i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m8,ta,mu +; CHECK: vor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vor.mask.nxv8i64.nxv8i64( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vor.nxv1i8.i8( + , + i8, + i64); + +define @intrinsic_vor_vx_nxv1i8_nxv1i8_i8( %0, i8 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vx_nxv1i8_nxv1i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vor.nxv1i8.i8( + %0, + i8 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vor.mask.nxv1i8.i8( + , + , + i8, + , + i64); + +define @intrinsic_vor_mask_vx_nxv1i8_nxv1i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vx_nxv1i8_nxv1i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vor.mask.nxv1i8.i8( + %0, + %1, + i8 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vor.nxv2i8.i8( + , + i8, + i64); + +define @intrinsic_vor_vx_nxv2i8_nxv2i8_i8( %0, i8 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vx_nxv2i8_nxv2i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vor.nxv2i8.i8( + %0, + i8 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vor.mask.nxv2i8.i8( + , + , + i8, + , + i64); + +define @intrinsic_vor_mask_vx_nxv2i8_nxv2i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vx_nxv2i8_nxv2i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vor.mask.nxv2i8.i8( + %0, + %1, + i8 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vor.nxv4i8.i8( + , + i8, + i64); + +define @intrinsic_vor_vx_nxv4i8_nxv4i8_i8( %0, i8 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vx_nxv4i8_nxv4i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vor.nxv4i8.i8( + %0, + i8 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vor.mask.nxv4i8.i8( + , + , + i8, + , + i64); + +define @intrinsic_vor_mask_vx_nxv4i8_nxv4i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vx_nxv4i8_nxv4i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vor.mask.nxv4i8.i8( + %0, + %1, + i8 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vor.nxv8i8.i8( + , + i8, + i64); + +define @intrinsic_vor_vx_nxv8i8_nxv8i8_i8( %0, i8 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vx_nxv8i8_nxv8i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vor.nxv8i8.i8( + %0, + i8 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vor.mask.nxv8i8.i8( + , + , + i8, + , + i64); + +define @intrinsic_vor_mask_vx_nxv8i8_nxv8i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vx_nxv8i8_nxv8i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vor.mask.nxv8i8.i8( + %0, + %1, + i8 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vor.nxv16i8.i8( + , + i8, + i64); + +define @intrinsic_vor_vx_nxv16i8_nxv16i8_i8( %0, i8 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vx_nxv16i8_nxv16i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vor.nxv16i8.i8( + %0, + i8 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vor.mask.nxv16i8.i8( + , + , + i8, + , + i64); + +define @intrinsic_vor_mask_vx_nxv16i8_nxv16i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vx_nxv16i8_nxv16i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vor.mask.nxv16i8.i8( + %0, + %1, + i8 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vor.nxv32i8.i8( + , + i8, + i64); + +define @intrinsic_vor_vx_nxv32i8_nxv32i8_i8( %0, i8 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vx_nxv32i8_nxv32i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vor.nxv32i8.i8( + %0, + i8 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vor.mask.nxv32i8.i8( + , + , + i8, + , + i64); + +define @intrinsic_vor_mask_vx_nxv32i8_nxv32i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vx_nxv32i8_nxv32i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vor.mask.nxv32i8.i8( + %0, + %1, + i8 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vor.nxv64i8.i8( + , + i8, + i64); + +define @intrinsic_vor_vx_nxv64i8_nxv64i8_i8( %0, i8 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vx_nxv64i8_nxv64i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m8,ta,mu +; CHECK: vor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vor.nxv64i8.i8( + %0, + i8 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vor.mask.nxv64i8.i8( + , + , + i8, + , + i64); + +define @intrinsic_vor_mask_vx_nxv64i8_nxv64i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vx_nxv64i8_nxv64i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m8,ta,mu +; CHECK: vor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vor.mask.nxv64i8.i8( + %0, + %1, + i8 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vor.nxv1i16.i16( + , + i16, + i64); + +define @intrinsic_vor_vx_nxv1i16_nxv1i16_i16( %0, i16 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vx_nxv1i16_nxv1i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vor.nxv1i16.i16( + %0, + i16 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vor.mask.nxv1i16.i16( + , + , + i16, + , + i64); + +define @intrinsic_vor_mask_vx_nxv1i16_nxv1i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vx_nxv1i16_nxv1i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vor.mask.nxv1i16.i16( + %0, + %1, + i16 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vor.nxv2i16.i16( + , + i16, + i64); + +define @intrinsic_vor_vx_nxv2i16_nxv2i16_i16( %0, i16 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vx_nxv2i16_nxv2i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vor.nxv2i16.i16( + %0, + i16 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vor.mask.nxv2i16.i16( + , + , + i16, + , + i64); + +define @intrinsic_vor_mask_vx_nxv2i16_nxv2i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vx_nxv2i16_nxv2i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vor.mask.nxv2i16.i16( + %0, + %1, + i16 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vor.nxv4i16.i16( + , + i16, + i64); + +define @intrinsic_vor_vx_nxv4i16_nxv4i16_i16( %0, i16 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vx_nxv4i16_nxv4i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vor.nxv4i16.i16( + %0, + i16 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vor.mask.nxv4i16.i16( + , + , + i16, + , + i64); + +define @intrinsic_vor_mask_vx_nxv4i16_nxv4i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vx_nxv4i16_nxv4i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vor.mask.nxv4i16.i16( + %0, + %1, + i16 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vor.nxv8i16.i16( + , + i16, + i64); + +define @intrinsic_vor_vx_nxv8i16_nxv8i16_i16( %0, i16 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vx_nxv8i16_nxv8i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vor.nxv8i16.i16( + %0, + i16 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vor.mask.nxv8i16.i16( + , + , + i16, + , + i64); + +define @intrinsic_vor_mask_vx_nxv8i16_nxv8i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vx_nxv8i16_nxv8i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vor.mask.nxv8i16.i16( + %0, + %1, + i16 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vor.nxv16i16.i16( + , + i16, + i64); + +define @intrinsic_vor_vx_nxv16i16_nxv16i16_i16( %0, i16 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vx_nxv16i16_nxv16i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vor.nxv16i16.i16( + %0, + i16 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vor.mask.nxv16i16.i16( + , + , + i16, + , + i64); + +define @intrinsic_vor_mask_vx_nxv16i16_nxv16i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vx_nxv16i16_nxv16i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vor.mask.nxv16i16.i16( + %0, + %1, + i16 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vor.nxv32i16.i16( + , + i16, + i64); + +define @intrinsic_vor_vx_nxv32i16_nxv32i16_i16( %0, i16 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vx_nxv32i16_nxv32i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m8,ta,mu +; CHECK: vor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vor.nxv32i16.i16( + %0, + i16 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vor.mask.nxv32i16.i16( + , + , + i16, + , + i64); + +define @intrinsic_vor_mask_vx_nxv32i16_nxv32i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vx_nxv32i16_nxv32i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m8,ta,mu +; CHECK: vor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vor.mask.nxv32i16.i16( + %0, + %1, + i16 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vor.nxv1i32.i32( + , + i32, + i64); + +define @intrinsic_vor_vx_nxv1i32_nxv1i32_i32( %0, i32 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vx_nxv1i32_nxv1i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vor.nxv1i32.i32( + %0, + i32 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vor.mask.nxv1i32.i32( + , + , + i32, + , + i64); + +define @intrinsic_vor_mask_vx_nxv1i32_nxv1i32_i32( %0, %1, i32 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vx_nxv1i32_nxv1i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vor.mask.nxv1i32.i32( + %0, + %1, + i32 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vor.nxv2i32.i32( + , + i32, + i64); + +define @intrinsic_vor_vx_nxv2i32_nxv2i32_i32( %0, i32 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vx_nxv2i32_nxv2i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vor.nxv2i32.i32( + %0, + i32 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vor.mask.nxv2i32.i32( + , + , + i32, + , + i64); + +define @intrinsic_vor_mask_vx_nxv2i32_nxv2i32_i32( %0, %1, i32 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vx_nxv2i32_nxv2i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vor.mask.nxv2i32.i32( + %0, + %1, + i32 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vor.nxv4i32.i32( + , + i32, + i64); + +define @intrinsic_vor_vx_nxv4i32_nxv4i32_i32( %0, i32 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vx_nxv4i32_nxv4i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vor.nxv4i32.i32( + %0, + i32 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vor.mask.nxv4i32.i32( + , + , + i32, + , + i64); + +define @intrinsic_vor_mask_vx_nxv4i32_nxv4i32_i32( %0, %1, i32 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vx_nxv4i32_nxv4i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vor.mask.nxv4i32.i32( + %0, + %1, + i32 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vor.nxv8i32.i32( + , + i32, + i64); + +define @intrinsic_vor_vx_nxv8i32_nxv8i32_i32( %0, i32 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vx_nxv8i32_nxv8i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vor.nxv8i32.i32( + %0, + i32 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vor.mask.nxv8i32.i32( + , + , + i32, + , + i64); + +define @intrinsic_vor_mask_vx_nxv8i32_nxv8i32_i32( %0, %1, i32 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vx_nxv8i32_nxv8i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vor.mask.nxv8i32.i32( + %0, + %1, + i32 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vor.nxv16i32.i32( + , + i32, + i64); + +define @intrinsic_vor_vx_nxv16i32_nxv16i32_i32( %0, i32 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vx_nxv16i32_nxv16i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m8,ta,mu +; CHECK: vor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vor.nxv16i32.i32( + %0, + i32 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vor.mask.nxv16i32.i32( + , + , + i32, + , + i64); + +define @intrinsic_vor_mask_vx_nxv16i32_nxv16i32_i32( %0, %1, i32 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vx_nxv16i32_nxv16i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m8,ta,mu +; CHECK: vor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vor.mask.nxv16i32.i32( + %0, + %1, + i32 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vor.nxv1i64.i64( + , + i64, + i64); + +define @intrinsic_vor_vx_nxv1i64_nxv1i64_i64( %0, i64 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vx_nxv1i64_nxv1i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vor.nxv1i64.i64( + %0, + i64 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vor.mask.nxv1i64.i64( + , + , + i64, + , + i64); + +define @intrinsic_vor_mask_vx_nxv1i64_nxv1i64_i64( %0, %1, i64 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vx_nxv1i64_nxv1i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vor.mask.nxv1i64.i64( + %0, + %1, + i64 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vor.nxv2i64.i64( + , + i64, + i64); + +define @intrinsic_vor_vx_nxv2i64_nxv2i64_i64( %0, i64 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vx_nxv2i64_nxv2i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vor.nxv2i64.i64( + %0, + i64 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vor.mask.nxv2i64.i64( + , + , + i64, + , + i64); + +define @intrinsic_vor_mask_vx_nxv2i64_nxv2i64_i64( %0, %1, i64 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vx_nxv2i64_nxv2i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vor.mask.nxv2i64.i64( + %0, + %1, + i64 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vor.nxv4i64.i64( + , + i64, + i64); + +define @intrinsic_vor_vx_nxv4i64_nxv4i64_i64( %0, i64 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vx_nxv4i64_nxv4i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vor.nxv4i64.i64( + %0, + i64 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vor.mask.nxv4i64.i64( + , + , + i64, + , + i64); + +define @intrinsic_vor_mask_vx_nxv4i64_nxv4i64_i64( %0, %1, i64 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vx_nxv4i64_nxv4i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vor.mask.nxv4i64.i64( + %0, + %1, + i64 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vor.nxv8i64.i64( + , + i64, + i64); + +define @intrinsic_vor_vx_nxv8i64_nxv8i64_i64( %0, i64 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vx_nxv8i64_nxv8i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m8,ta,mu +; CHECK: vor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vor.nxv8i64.i64( + %0, + i64 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vor.mask.nxv8i64.i64( + , + , + i64, + , + i64); + +define @intrinsic_vor_mask_vx_nxv8i64_nxv8i64_i64( %0, %1, i64 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vx_nxv8i64_nxv8i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m8,ta,mu +; CHECK: vor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vor.mask.nxv8i64.i64( + %0, + %1, + i64 %2, + %3, + i64 %4) + + ret %a +} + +define @intrinsic_vor_vi_nxv1i8_nxv1i8_i8( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vi_nxv1i8_nxv1i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vor.nxv1i8.i8( + %0, + i8 9, + i64 %1) + + ret %a +} + +define @intrinsic_vor_mask_vi_nxv1i8_nxv1i8_i8( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vi_nxv1i8_nxv1i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vor.mask.nxv1i8.i8( + %0, + %1, + i8 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vor_vi_nxv2i8_nxv2i8_i8( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vi_nxv2i8_nxv2i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vor.nxv2i8.i8( + %0, + i8 9, + i64 %1) + + ret %a +} + +define @intrinsic_vor_mask_vi_nxv2i8_nxv2i8_i8( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vi_nxv2i8_nxv2i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vor.mask.nxv2i8.i8( + %0, + %1, + i8 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vor_vi_nxv4i8_nxv4i8_i8( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vi_nxv4i8_nxv4i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vor.nxv4i8.i8( + %0, + i8 9, + i64 %1) + + ret %a +} + +define @intrinsic_vor_mask_vi_nxv4i8_nxv4i8_i8( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vi_nxv4i8_nxv4i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vor.mask.nxv4i8.i8( + %0, + %1, + i8 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vor_vi_nxv8i8_nxv8i8_i8( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vi_nxv8i8_nxv8i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vor.nxv8i8.i8( + %0, + i8 9, + i64 %1) + + ret %a +} + +define @intrinsic_vor_mask_vi_nxv8i8_nxv8i8_i8( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vi_nxv8i8_nxv8i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vor.mask.nxv8i8.i8( + %0, + %1, + i8 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vor_vi_nxv16i8_nxv16i8_i8( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vi_nxv16i8_nxv16i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vor.nxv16i8.i8( + %0, + i8 9, + i64 %1) + + ret %a +} + +define @intrinsic_vor_mask_vi_nxv16i8_nxv16i8_i8( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vi_nxv16i8_nxv16i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vor.mask.nxv16i8.i8( + %0, + %1, + i8 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vor_vi_nxv32i8_nxv32i8_i8( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vi_nxv32i8_nxv32i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vor.nxv32i8.i8( + %0, + i8 9, + i64 %1) + + ret %a +} + +define @intrinsic_vor_mask_vi_nxv32i8_nxv32i8_i8( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vi_nxv32i8_nxv32i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vor.mask.nxv32i8.i8( + %0, + %1, + i8 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vor_vi_nxv64i8_nxv64i8_i8( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vi_nxv64i8_nxv64i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m8,ta,mu +; CHECK: vor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vor.nxv64i8.i8( + %0, + i8 9, + i64 %1) + + ret %a +} + +define @intrinsic_vor_mask_vi_nxv64i8_nxv64i8_i8( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vi_nxv64i8_nxv64i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m8,ta,mu +; CHECK: vor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vor.mask.nxv64i8.i8( + %0, + %1, + i8 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vor_vi_nxv1i16_nxv1i16_i16( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vi_nxv1i16_nxv1i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vor.nxv1i16.i16( + %0, + i16 9, + i64 %1) + + ret %a +} + +define @intrinsic_vor_mask_vi_nxv1i16_nxv1i16_i16( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vi_nxv1i16_nxv1i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vor.mask.nxv1i16.i16( + %0, + %1, + i16 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vor_vi_nxv2i16_nxv2i16_i16( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vi_nxv2i16_nxv2i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vor.nxv2i16.i16( + %0, + i16 9, + i64 %1) + + ret %a +} + +define @intrinsic_vor_mask_vi_nxv2i16_nxv2i16_i16( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vi_nxv2i16_nxv2i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vor.mask.nxv2i16.i16( + %0, + %1, + i16 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vor_vi_nxv4i16_nxv4i16_i16( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vi_nxv4i16_nxv4i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vor.nxv4i16.i16( + %0, + i16 9, + i64 %1) + + ret %a +} + +define @intrinsic_vor_mask_vi_nxv4i16_nxv4i16_i16( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vi_nxv4i16_nxv4i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vor.mask.nxv4i16.i16( + %0, + %1, + i16 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vor_vi_nxv8i16_nxv8i16_i16( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vi_nxv8i16_nxv8i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vor.nxv8i16.i16( + %0, + i16 9, + i64 %1) + + ret %a +} + +define @intrinsic_vor_mask_vi_nxv8i16_nxv8i16_i16( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vi_nxv8i16_nxv8i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vor.mask.nxv8i16.i16( + %0, + %1, + i16 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vor_vi_nxv16i16_nxv16i16_i16( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vi_nxv16i16_nxv16i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vor.nxv16i16.i16( + %0, + i16 9, + i64 %1) + + ret %a +} + +define @intrinsic_vor_mask_vi_nxv16i16_nxv16i16_i16( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vi_nxv16i16_nxv16i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vor.mask.nxv16i16.i16( + %0, + %1, + i16 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vor_vi_nxv32i16_nxv32i16_i16( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vi_nxv32i16_nxv32i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m8,ta,mu +; CHECK: vor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vor.nxv32i16.i16( + %0, + i16 9, + i64 %1) + + ret %a +} + +define @intrinsic_vor_mask_vi_nxv32i16_nxv32i16_i16( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vi_nxv32i16_nxv32i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m8,ta,mu +; CHECK: vor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vor.mask.nxv32i16.i16( + %0, + %1, + i16 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vor_vi_nxv1i32_nxv1i32_i32( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vi_nxv1i32_nxv1i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vor.nxv1i32.i32( + %0, + i32 9, + i64 %1) + + ret %a +} + +define @intrinsic_vor_mask_vi_nxv1i32_nxv1i32_i32( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vi_nxv1i32_nxv1i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vor.mask.nxv1i32.i32( + %0, + %1, + i32 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vor_vi_nxv2i32_nxv2i32_i32( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vi_nxv2i32_nxv2i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vor.nxv2i32.i32( + %0, + i32 9, + i64 %1) + + ret %a +} + +define @intrinsic_vor_mask_vi_nxv2i32_nxv2i32_i32( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vi_nxv2i32_nxv2i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vor.mask.nxv2i32.i32( + %0, + %1, + i32 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vor_vi_nxv4i32_nxv4i32_i32( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vi_nxv4i32_nxv4i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vor.nxv4i32.i32( + %0, + i32 9, + i64 %1) + + ret %a +} + +define @intrinsic_vor_mask_vi_nxv4i32_nxv4i32_i32( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vi_nxv4i32_nxv4i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vor.mask.nxv4i32.i32( + %0, + %1, + i32 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vor_vi_nxv8i32_nxv8i32_i32( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vi_nxv8i32_nxv8i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vor.nxv8i32.i32( + %0, + i32 9, + i64 %1) + + ret %a +} + +define @intrinsic_vor_mask_vi_nxv8i32_nxv8i32_i32( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vi_nxv8i32_nxv8i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vor.mask.nxv8i32.i32( + %0, + %1, + i32 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vor_vi_nxv16i32_nxv16i32_i32( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vi_nxv16i32_nxv16i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m8,ta,mu +; CHECK: vor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vor.nxv16i32.i32( + %0, + i32 9, + i64 %1) + + ret %a +} + +define @intrinsic_vor_mask_vi_nxv16i32_nxv16i32_i32( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vi_nxv16i32_nxv16i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m8,ta,mu +; CHECK: vor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vor.mask.nxv16i32.i32( + %0, + %1, + i32 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vor_vi_nxv1i64_nxv1i64_i64( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vi_nxv1i64_nxv1i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vor.nxv1i64.i64( + %0, + i64 9, + i64 %1) + + ret %a +} + +define @intrinsic_vor_mask_vi_nxv1i64_nxv1i64_i64( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vi_nxv1i64_nxv1i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vor.mask.nxv1i64.i64( + %0, + %1, + i64 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vor_vi_nxv2i64_nxv2i64_i64( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vi_nxv2i64_nxv2i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vor.nxv2i64.i64( + %0, + i64 9, + i64 %1) + + ret %a +} + +define @intrinsic_vor_mask_vi_nxv2i64_nxv2i64_i64( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vi_nxv2i64_nxv2i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vor.mask.nxv2i64.i64( + %0, + %1, + i64 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vor_vi_nxv4i64_nxv4i64_i64( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vi_nxv4i64_nxv4i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vor.nxv4i64.i64( + %0, + i64 9, + i64 %1) + + ret %a +} + +define @intrinsic_vor_mask_vi_nxv4i64_nxv4i64_i64( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vi_nxv4i64_nxv4i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vor.mask.nxv4i64.i64( + %0, + %1, + i64 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vor_vi_nxv8i64_nxv8i64_i64( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_vi_nxv8i64_nxv8i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m8,ta,mu +; CHECK: vor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vor.nxv8i64.i64( + %0, + i64 9, + i64 %1) + + ret %a +} + +define @intrinsic_vor_mask_vi_nxv8i64_nxv8i64_i64( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vor_mask_vi_nxv8i64_nxv8i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m8,ta,mu +; CHECK: vor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vor.mask.nxv8i64.i64( + %0, + %1, + i64 9, + %2, + i64 %3) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vxor-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vxor-rv32.ll new file mode 100644 index 0000000000000..ee631bd6623bc --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vxor-rv32.ll @@ -0,0 +1,1945 @@ +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare @llvm.riscv.vxor.nxv1i8.nxv1i8( + , + , + i32); + +define @intrinsic_vxor_vv_nxv1i8_nxv1i8_nxv1i8( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vv_nxv1i8_nxv1i8_nxv1i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vxor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vxor.nxv1i8.nxv1i8( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vxor.mask.nxv1i8.nxv1i8( + , + , + , + , + i32); + +define @intrinsic_vxor_mask_vv_nxv1i8_nxv1i8_nxv1i8( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vv_nxv1i8_nxv1i8_nxv1i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vxor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vxor.mask.nxv1i8.nxv1i8( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vxor.nxv2i8.nxv2i8( + , + , + i32); + +define @intrinsic_vxor_vv_nxv2i8_nxv2i8_nxv2i8( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vv_nxv2i8_nxv2i8_nxv2i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vxor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vxor.nxv2i8.nxv2i8( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vxor.mask.nxv2i8.nxv2i8( + , + , + , + , + i32); + +define @intrinsic_vxor_mask_vv_nxv2i8_nxv2i8_nxv2i8( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vv_nxv2i8_nxv2i8_nxv2i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vxor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vxor.mask.nxv2i8.nxv2i8( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vxor.nxv4i8.nxv4i8( + , + , + i32); + +define @intrinsic_vxor_vv_nxv4i8_nxv4i8_nxv4i8( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vv_nxv4i8_nxv4i8_nxv4i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vxor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vxor.nxv4i8.nxv4i8( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vxor.mask.nxv4i8.nxv4i8( + , + , + , + , + i32); + +define @intrinsic_vxor_mask_vv_nxv4i8_nxv4i8_nxv4i8( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vv_nxv4i8_nxv4i8_nxv4i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vxor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vxor.mask.nxv4i8.nxv4i8( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vxor.nxv8i8.nxv8i8( + , + , + i32); + +define @intrinsic_vxor_vv_nxv8i8_nxv8i8_nxv8i8( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vv_nxv8i8_nxv8i8_nxv8i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vxor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vxor.nxv8i8.nxv8i8( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vxor.mask.nxv8i8.nxv8i8( + , + , + , + , + i32); + +define @intrinsic_vxor_mask_vv_nxv8i8_nxv8i8_nxv8i8( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vv_nxv8i8_nxv8i8_nxv8i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vxor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vxor.mask.nxv8i8.nxv8i8( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vxor.nxv16i8.nxv16i8( + , + , + i32); + +define @intrinsic_vxor_vv_nxv16i8_nxv16i8_nxv16i8( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vv_nxv16i8_nxv16i8_nxv16i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vxor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vxor.nxv16i8.nxv16i8( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vxor.mask.nxv16i8.nxv16i8( + , + , + , + , + i32); + +define @intrinsic_vxor_mask_vv_nxv16i8_nxv16i8_nxv16i8( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vv_nxv16i8_nxv16i8_nxv16i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vxor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vxor.mask.nxv16i8.nxv16i8( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vxor.nxv32i8.nxv32i8( + , + , + i32); + +define @intrinsic_vxor_vv_nxv32i8_nxv32i8_nxv32i8( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vv_nxv32i8_nxv32i8_nxv32i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vxor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vxor.nxv32i8.nxv32i8( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vxor.mask.nxv32i8.nxv32i8( + , + , + , + , + i32); + +define @intrinsic_vxor_mask_vv_nxv32i8_nxv32i8_nxv32i8( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vv_nxv32i8_nxv32i8_nxv32i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vxor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vxor.mask.nxv32i8.nxv32i8( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vxor.nxv64i8.nxv64i8( + , + , + i32); + +define @intrinsic_vxor_vv_nxv64i8_nxv64i8_nxv64i8( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vv_nxv64i8_nxv64i8_nxv64i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m8,ta,mu +; CHECK: vxor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vxor.nxv64i8.nxv64i8( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vxor.mask.nxv64i8.nxv64i8( + , + , + , + , + i32); + +define @intrinsic_vxor_mask_vv_nxv64i8_nxv64i8_nxv64i8( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vv_nxv64i8_nxv64i8_nxv64i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m8,ta,mu +; CHECK: vxor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vxor.mask.nxv64i8.nxv64i8( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vxor.nxv1i16.nxv1i16( + , + , + i32); + +define @intrinsic_vxor_vv_nxv1i16_nxv1i16_nxv1i16( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vv_nxv1i16_nxv1i16_nxv1i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vxor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vxor.nxv1i16.nxv1i16( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vxor.mask.nxv1i16.nxv1i16( + , + , + , + , + i32); + +define @intrinsic_vxor_mask_vv_nxv1i16_nxv1i16_nxv1i16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vv_nxv1i16_nxv1i16_nxv1i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vxor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vxor.mask.nxv1i16.nxv1i16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vxor.nxv2i16.nxv2i16( + , + , + i32); + +define @intrinsic_vxor_vv_nxv2i16_nxv2i16_nxv2i16( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vv_nxv2i16_nxv2i16_nxv2i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vxor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vxor.nxv2i16.nxv2i16( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vxor.mask.nxv2i16.nxv2i16( + , + , + , + , + i32); + +define @intrinsic_vxor_mask_vv_nxv2i16_nxv2i16_nxv2i16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vv_nxv2i16_nxv2i16_nxv2i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vxor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vxor.mask.nxv2i16.nxv2i16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vxor.nxv4i16.nxv4i16( + , + , + i32); + +define @intrinsic_vxor_vv_nxv4i16_nxv4i16_nxv4i16( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vv_nxv4i16_nxv4i16_nxv4i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vxor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vxor.nxv4i16.nxv4i16( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vxor.mask.nxv4i16.nxv4i16( + , + , + , + , + i32); + +define @intrinsic_vxor_mask_vv_nxv4i16_nxv4i16_nxv4i16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vv_nxv4i16_nxv4i16_nxv4i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vxor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vxor.mask.nxv4i16.nxv4i16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vxor.nxv8i16.nxv8i16( + , + , + i32); + +define @intrinsic_vxor_vv_nxv8i16_nxv8i16_nxv8i16( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vv_nxv8i16_nxv8i16_nxv8i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vxor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vxor.nxv8i16.nxv8i16( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vxor.mask.nxv8i16.nxv8i16( + , + , + , + , + i32); + +define @intrinsic_vxor_mask_vv_nxv8i16_nxv8i16_nxv8i16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vv_nxv8i16_nxv8i16_nxv8i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vxor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vxor.mask.nxv8i16.nxv8i16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vxor.nxv16i16.nxv16i16( + , + , + i32); + +define @intrinsic_vxor_vv_nxv16i16_nxv16i16_nxv16i16( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vv_nxv16i16_nxv16i16_nxv16i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vxor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vxor.nxv16i16.nxv16i16( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vxor.mask.nxv16i16.nxv16i16( + , + , + , + , + i32); + +define @intrinsic_vxor_mask_vv_nxv16i16_nxv16i16_nxv16i16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vv_nxv16i16_nxv16i16_nxv16i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vxor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vxor.mask.nxv16i16.nxv16i16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vxor.nxv32i16.nxv32i16( + , + , + i32); + +define @intrinsic_vxor_vv_nxv32i16_nxv32i16_nxv32i16( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vv_nxv32i16_nxv32i16_nxv32i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m8,ta,mu +; CHECK: vxor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vxor.nxv32i16.nxv32i16( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vxor.mask.nxv32i16.nxv32i16( + , + , + , + , + i32); + +define @intrinsic_vxor_mask_vv_nxv32i16_nxv32i16_nxv32i16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vv_nxv32i16_nxv32i16_nxv32i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m8,ta,mu +; CHECK: vxor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vxor.mask.nxv32i16.nxv32i16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vxor.nxv1i32.nxv1i32( + , + , + i32); + +define @intrinsic_vxor_vv_nxv1i32_nxv1i32_nxv1i32( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vv_nxv1i32_nxv1i32_nxv1i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vxor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vxor.nxv1i32.nxv1i32( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vxor.mask.nxv1i32.nxv1i32( + , + , + , + , + i32); + +define @intrinsic_vxor_mask_vv_nxv1i32_nxv1i32_nxv1i32( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vv_nxv1i32_nxv1i32_nxv1i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vxor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vxor.mask.nxv1i32.nxv1i32( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vxor.nxv2i32.nxv2i32( + , + , + i32); + +define @intrinsic_vxor_vv_nxv2i32_nxv2i32_nxv2i32( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vv_nxv2i32_nxv2i32_nxv2i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vxor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vxor.nxv2i32.nxv2i32( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vxor.mask.nxv2i32.nxv2i32( + , + , + , + , + i32); + +define @intrinsic_vxor_mask_vv_nxv2i32_nxv2i32_nxv2i32( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vv_nxv2i32_nxv2i32_nxv2i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vxor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vxor.mask.nxv2i32.nxv2i32( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vxor.nxv4i32.nxv4i32( + , + , + i32); + +define @intrinsic_vxor_vv_nxv4i32_nxv4i32_nxv4i32( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vv_nxv4i32_nxv4i32_nxv4i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vxor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vxor.nxv4i32.nxv4i32( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vxor.mask.nxv4i32.nxv4i32( + , + , + , + , + i32); + +define @intrinsic_vxor_mask_vv_nxv4i32_nxv4i32_nxv4i32( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vv_nxv4i32_nxv4i32_nxv4i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vxor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vxor.mask.nxv4i32.nxv4i32( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vxor.nxv8i32.nxv8i32( + , + , + i32); + +define @intrinsic_vxor_vv_nxv8i32_nxv8i32_nxv8i32( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vv_nxv8i32_nxv8i32_nxv8i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vxor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vxor.nxv8i32.nxv8i32( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vxor.mask.nxv8i32.nxv8i32( + , + , + , + , + i32); + +define @intrinsic_vxor_mask_vv_nxv8i32_nxv8i32_nxv8i32( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vv_nxv8i32_nxv8i32_nxv8i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vxor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vxor.mask.nxv8i32.nxv8i32( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vxor.nxv16i32.nxv16i32( + , + , + i32); + +define @intrinsic_vxor_vv_nxv16i32_nxv16i32_nxv16i32( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vv_nxv16i32_nxv16i32_nxv16i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m8,ta,mu +; CHECK: vxor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vxor.nxv16i32.nxv16i32( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vxor.mask.nxv16i32.nxv16i32( + , + , + , + , + i32); + +define @intrinsic_vxor_mask_vv_nxv16i32_nxv16i32_nxv16i32( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vv_nxv16i32_nxv16i32_nxv16i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m8,ta,mu +; CHECK: vxor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vxor.mask.nxv16i32.nxv16i32( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vxor.nxv1i8.i8( + , + i8, + i32); + +define @intrinsic_vxor_vx_nxv1i8_nxv1i8_i8( %0, i8 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vx_nxv1i8_nxv1i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vxor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vxor.nxv1i8.i8( + %0, + i8 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vxor.mask.nxv1i8.i8( + , + , + i8, + , + i32); + +define @intrinsic_vxor_mask_vx_nxv1i8_nxv1i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vx_nxv1i8_nxv1i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vxor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vxor.mask.nxv1i8.i8( + %0, + %1, + i8 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vxor.nxv2i8.i8( + , + i8, + i32); + +define @intrinsic_vxor_vx_nxv2i8_nxv2i8_i8( %0, i8 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vx_nxv2i8_nxv2i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vxor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vxor.nxv2i8.i8( + %0, + i8 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vxor.mask.nxv2i8.i8( + , + , + i8, + , + i32); + +define @intrinsic_vxor_mask_vx_nxv2i8_nxv2i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vx_nxv2i8_nxv2i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vxor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vxor.mask.nxv2i8.i8( + %0, + %1, + i8 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vxor.nxv4i8.i8( + , + i8, + i32); + +define @intrinsic_vxor_vx_nxv4i8_nxv4i8_i8( %0, i8 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vx_nxv4i8_nxv4i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vxor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vxor.nxv4i8.i8( + %0, + i8 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vxor.mask.nxv4i8.i8( + , + , + i8, + , + i32); + +define @intrinsic_vxor_mask_vx_nxv4i8_nxv4i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vx_nxv4i8_nxv4i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vxor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vxor.mask.nxv4i8.i8( + %0, + %1, + i8 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vxor.nxv8i8.i8( + , + i8, + i32); + +define @intrinsic_vxor_vx_nxv8i8_nxv8i8_i8( %0, i8 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vx_nxv8i8_nxv8i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vxor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vxor.nxv8i8.i8( + %0, + i8 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vxor.mask.nxv8i8.i8( + , + , + i8, + , + i32); + +define @intrinsic_vxor_mask_vx_nxv8i8_nxv8i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vx_nxv8i8_nxv8i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vxor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vxor.mask.nxv8i8.i8( + %0, + %1, + i8 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vxor.nxv16i8.i8( + , + i8, + i32); + +define @intrinsic_vxor_vx_nxv16i8_nxv16i8_i8( %0, i8 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vx_nxv16i8_nxv16i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vxor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vxor.nxv16i8.i8( + %0, + i8 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vxor.mask.nxv16i8.i8( + , + , + i8, + , + i32); + +define @intrinsic_vxor_mask_vx_nxv16i8_nxv16i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vx_nxv16i8_nxv16i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vxor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vxor.mask.nxv16i8.i8( + %0, + %1, + i8 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vxor.nxv32i8.i8( + , + i8, + i32); + +define @intrinsic_vxor_vx_nxv32i8_nxv32i8_i8( %0, i8 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vx_nxv32i8_nxv32i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vxor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vxor.nxv32i8.i8( + %0, + i8 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vxor.mask.nxv32i8.i8( + , + , + i8, + , + i32); + +define @intrinsic_vxor_mask_vx_nxv32i8_nxv32i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vx_nxv32i8_nxv32i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vxor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vxor.mask.nxv32i8.i8( + %0, + %1, + i8 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vxor.nxv64i8.i8( + , + i8, + i32); + +define @intrinsic_vxor_vx_nxv64i8_nxv64i8_i8( %0, i8 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vx_nxv64i8_nxv64i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m8,ta,mu +; CHECK: vxor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vxor.nxv64i8.i8( + %0, + i8 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vxor.mask.nxv64i8.i8( + , + , + i8, + , + i32); + +define @intrinsic_vxor_mask_vx_nxv64i8_nxv64i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vx_nxv64i8_nxv64i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m8,ta,mu +; CHECK: vxor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vxor.mask.nxv64i8.i8( + %0, + %1, + i8 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vxor.nxv1i16.i16( + , + i16, + i32); + +define @intrinsic_vxor_vx_nxv1i16_nxv1i16_i16( %0, i16 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vx_nxv1i16_nxv1i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vxor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vxor.nxv1i16.i16( + %0, + i16 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vxor.mask.nxv1i16.i16( + , + , + i16, + , + i32); + +define @intrinsic_vxor_mask_vx_nxv1i16_nxv1i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vx_nxv1i16_nxv1i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vxor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vxor.mask.nxv1i16.i16( + %0, + %1, + i16 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vxor.nxv2i16.i16( + , + i16, + i32); + +define @intrinsic_vxor_vx_nxv2i16_nxv2i16_i16( %0, i16 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vx_nxv2i16_nxv2i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vxor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vxor.nxv2i16.i16( + %0, + i16 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vxor.mask.nxv2i16.i16( + , + , + i16, + , + i32); + +define @intrinsic_vxor_mask_vx_nxv2i16_nxv2i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vx_nxv2i16_nxv2i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vxor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vxor.mask.nxv2i16.i16( + %0, + %1, + i16 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vxor.nxv4i16.i16( + , + i16, + i32); + +define @intrinsic_vxor_vx_nxv4i16_nxv4i16_i16( %0, i16 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vx_nxv4i16_nxv4i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vxor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vxor.nxv4i16.i16( + %0, + i16 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vxor.mask.nxv4i16.i16( + , + , + i16, + , + i32); + +define @intrinsic_vxor_mask_vx_nxv4i16_nxv4i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vx_nxv4i16_nxv4i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vxor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vxor.mask.nxv4i16.i16( + %0, + %1, + i16 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vxor.nxv8i16.i16( + , + i16, + i32); + +define @intrinsic_vxor_vx_nxv8i16_nxv8i16_i16( %0, i16 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vx_nxv8i16_nxv8i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vxor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vxor.nxv8i16.i16( + %0, + i16 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vxor.mask.nxv8i16.i16( + , + , + i16, + , + i32); + +define @intrinsic_vxor_mask_vx_nxv8i16_nxv8i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vx_nxv8i16_nxv8i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vxor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vxor.mask.nxv8i16.i16( + %0, + %1, + i16 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vxor.nxv16i16.i16( + , + i16, + i32); + +define @intrinsic_vxor_vx_nxv16i16_nxv16i16_i16( %0, i16 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vx_nxv16i16_nxv16i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vxor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vxor.nxv16i16.i16( + %0, + i16 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vxor.mask.nxv16i16.i16( + , + , + i16, + , + i32); + +define @intrinsic_vxor_mask_vx_nxv16i16_nxv16i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vx_nxv16i16_nxv16i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vxor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vxor.mask.nxv16i16.i16( + %0, + %1, + i16 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vxor.nxv32i16.i16( + , + i16, + i32); + +define @intrinsic_vxor_vx_nxv32i16_nxv32i16_i16( %0, i16 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vx_nxv32i16_nxv32i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m8,ta,mu +; CHECK: vxor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vxor.nxv32i16.i16( + %0, + i16 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vxor.mask.nxv32i16.i16( + , + , + i16, + , + i32); + +define @intrinsic_vxor_mask_vx_nxv32i16_nxv32i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vx_nxv32i16_nxv32i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m8,ta,mu +; CHECK: vxor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vxor.mask.nxv32i16.i16( + %0, + %1, + i16 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vxor.nxv1i32.i32( + , + i32, + i32); + +define @intrinsic_vxor_vx_nxv1i32_nxv1i32_i32( %0, i32 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vx_nxv1i32_nxv1i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vxor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vxor.nxv1i32.i32( + %0, + i32 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vxor.mask.nxv1i32.i32( + , + , + i32, + , + i32); + +define @intrinsic_vxor_mask_vx_nxv1i32_nxv1i32_i32( %0, %1, i32 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vx_nxv1i32_nxv1i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vxor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vxor.mask.nxv1i32.i32( + %0, + %1, + i32 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vxor.nxv2i32.i32( + , + i32, + i32); + +define @intrinsic_vxor_vx_nxv2i32_nxv2i32_i32( %0, i32 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vx_nxv2i32_nxv2i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vxor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vxor.nxv2i32.i32( + %0, + i32 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vxor.mask.nxv2i32.i32( + , + , + i32, + , + i32); + +define @intrinsic_vxor_mask_vx_nxv2i32_nxv2i32_i32( %0, %1, i32 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vx_nxv2i32_nxv2i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vxor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vxor.mask.nxv2i32.i32( + %0, + %1, + i32 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vxor.nxv4i32.i32( + , + i32, + i32); + +define @intrinsic_vxor_vx_nxv4i32_nxv4i32_i32( %0, i32 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vx_nxv4i32_nxv4i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vxor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vxor.nxv4i32.i32( + %0, + i32 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vxor.mask.nxv4i32.i32( + , + , + i32, + , + i32); + +define @intrinsic_vxor_mask_vx_nxv4i32_nxv4i32_i32( %0, %1, i32 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vx_nxv4i32_nxv4i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vxor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vxor.mask.nxv4i32.i32( + %0, + %1, + i32 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vxor.nxv8i32.i32( + , + i32, + i32); + +define @intrinsic_vxor_vx_nxv8i32_nxv8i32_i32( %0, i32 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vx_nxv8i32_nxv8i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vxor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vxor.nxv8i32.i32( + %0, + i32 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vxor.mask.nxv8i32.i32( + , + , + i32, + , + i32); + +define @intrinsic_vxor_mask_vx_nxv8i32_nxv8i32_i32( %0, %1, i32 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vx_nxv8i32_nxv8i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vxor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vxor.mask.nxv8i32.i32( + %0, + %1, + i32 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vxor.nxv16i32.i32( + , + i32, + i32); + +define @intrinsic_vxor_vx_nxv16i32_nxv16i32_i32( %0, i32 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vx_nxv16i32_nxv16i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m8,ta,mu +; CHECK: vxor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vxor.nxv16i32.i32( + %0, + i32 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vxor.mask.nxv16i32.i32( + , + , + i32, + , + i32); + +define @intrinsic_vxor_mask_vx_nxv16i32_nxv16i32_i32( %0, %1, i32 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vx_nxv16i32_nxv16i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m8,ta,mu +; CHECK: vxor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vxor.mask.nxv16i32.i32( + %0, + %1, + i32 %2, + %3, + i32 %4) + + ret %a +} + +define @intrinsic_vxor_vi_nxv1i8_nxv1i8_i8( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vi_nxv1i8_nxv1i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vxor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vxor.nxv1i8.i8( + %0, + i8 9, + i32 %1) + + ret %a +} + +define @intrinsic_vxor_mask_vi_nxv1i8_nxv1i8_i8( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vi_nxv1i8_nxv1i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vxor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vxor.mask.nxv1i8.i8( + %0, + %1, + i8 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vxor_vi_nxv2i8_nxv2i8_i8( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vi_nxv2i8_nxv2i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vxor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vxor.nxv2i8.i8( + %0, + i8 9, + i32 %1) + + ret %a +} + +define @intrinsic_vxor_mask_vi_nxv2i8_nxv2i8_i8( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vi_nxv2i8_nxv2i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vxor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vxor.mask.nxv2i8.i8( + %0, + %1, + i8 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vxor_vi_nxv4i8_nxv4i8_i8( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vi_nxv4i8_nxv4i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vxor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vxor.nxv4i8.i8( + %0, + i8 9, + i32 %1) + + ret %a +} + +define @intrinsic_vxor_mask_vi_nxv4i8_nxv4i8_i8( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vi_nxv4i8_nxv4i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vxor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vxor.mask.nxv4i8.i8( + %0, + %1, + i8 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vxor_vi_nxv8i8_nxv8i8_i8( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vi_nxv8i8_nxv8i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vxor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vxor.nxv8i8.i8( + %0, + i8 9, + i32 %1) + + ret %a +} + +define @intrinsic_vxor_mask_vi_nxv8i8_nxv8i8_i8( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vi_nxv8i8_nxv8i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vxor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vxor.mask.nxv8i8.i8( + %0, + %1, + i8 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vxor_vi_nxv16i8_nxv16i8_i8( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vi_nxv16i8_nxv16i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vxor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vxor.nxv16i8.i8( + %0, + i8 9, + i32 %1) + + ret %a +} + +define @intrinsic_vxor_mask_vi_nxv16i8_nxv16i8_i8( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vi_nxv16i8_nxv16i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vxor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vxor.mask.nxv16i8.i8( + %0, + %1, + i8 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vxor_vi_nxv32i8_nxv32i8_i8( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vi_nxv32i8_nxv32i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vxor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vxor.nxv32i8.i8( + %0, + i8 9, + i32 %1) + + ret %a +} + +define @intrinsic_vxor_mask_vi_nxv32i8_nxv32i8_i8( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vi_nxv32i8_nxv32i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vxor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vxor.mask.nxv32i8.i8( + %0, + %1, + i8 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vxor_vi_nxv64i8_nxv64i8_i8( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vi_nxv64i8_nxv64i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m8,ta,mu +; CHECK: vxor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vxor.nxv64i8.i8( + %0, + i8 9, + i32 %1) + + ret %a +} + +define @intrinsic_vxor_mask_vi_nxv64i8_nxv64i8_i8( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vi_nxv64i8_nxv64i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m8,ta,mu +; CHECK: vxor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vxor.mask.nxv64i8.i8( + %0, + %1, + i8 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vxor_vi_nxv1i16_nxv1i16_i16( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vi_nxv1i16_nxv1i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vxor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vxor.nxv1i16.i16( + %0, + i16 9, + i32 %1) + + ret %a +} + +define @intrinsic_vxor_mask_vi_nxv1i16_nxv1i16_i16( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vi_nxv1i16_nxv1i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vxor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vxor.mask.nxv1i16.i16( + %0, + %1, + i16 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vxor_vi_nxv2i16_nxv2i16_i16( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vi_nxv2i16_nxv2i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vxor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vxor.nxv2i16.i16( + %0, + i16 9, + i32 %1) + + ret %a +} + +define @intrinsic_vxor_mask_vi_nxv2i16_nxv2i16_i16( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vi_nxv2i16_nxv2i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vxor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vxor.mask.nxv2i16.i16( + %0, + %1, + i16 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vxor_vi_nxv4i16_nxv4i16_i16( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vi_nxv4i16_nxv4i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vxor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vxor.nxv4i16.i16( + %0, + i16 9, + i32 %1) + + ret %a +} + +define @intrinsic_vxor_mask_vi_nxv4i16_nxv4i16_i16( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vi_nxv4i16_nxv4i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vxor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vxor.mask.nxv4i16.i16( + %0, + %1, + i16 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vxor_vi_nxv8i16_nxv8i16_i16( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vi_nxv8i16_nxv8i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vxor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vxor.nxv8i16.i16( + %0, + i16 9, + i32 %1) + + ret %a +} + +define @intrinsic_vxor_mask_vi_nxv8i16_nxv8i16_i16( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vi_nxv8i16_nxv8i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vxor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vxor.mask.nxv8i16.i16( + %0, + %1, + i16 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vxor_vi_nxv16i16_nxv16i16_i16( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vi_nxv16i16_nxv16i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vxor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vxor.nxv16i16.i16( + %0, + i16 9, + i32 %1) + + ret %a +} + +define @intrinsic_vxor_mask_vi_nxv16i16_nxv16i16_i16( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vi_nxv16i16_nxv16i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vxor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vxor.mask.nxv16i16.i16( + %0, + %1, + i16 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vxor_vi_nxv32i16_nxv32i16_i16( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vi_nxv32i16_nxv32i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m8,ta,mu +; CHECK: vxor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vxor.nxv32i16.i16( + %0, + i16 9, + i32 %1) + + ret %a +} + +define @intrinsic_vxor_mask_vi_nxv32i16_nxv32i16_i16( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vi_nxv32i16_nxv32i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m8,ta,mu +; CHECK: vxor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vxor.mask.nxv32i16.i16( + %0, + %1, + i16 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vxor_vi_nxv1i32_nxv1i32_i32( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vi_nxv1i32_nxv1i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vxor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vxor.nxv1i32.i32( + %0, + i32 9, + i32 %1) + + ret %a +} + +define @intrinsic_vxor_mask_vi_nxv1i32_nxv1i32_i32( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vi_nxv1i32_nxv1i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vxor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vxor.mask.nxv1i32.i32( + %0, + %1, + i32 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vxor_vi_nxv2i32_nxv2i32_i32( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vi_nxv2i32_nxv2i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vxor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vxor.nxv2i32.i32( + %0, + i32 9, + i32 %1) + + ret %a +} + +define @intrinsic_vxor_mask_vi_nxv2i32_nxv2i32_i32( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vi_nxv2i32_nxv2i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vxor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vxor.mask.nxv2i32.i32( + %0, + %1, + i32 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vxor_vi_nxv4i32_nxv4i32_i32( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vi_nxv4i32_nxv4i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vxor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vxor.nxv4i32.i32( + %0, + i32 9, + i32 %1) + + ret %a +} + +define @intrinsic_vxor_mask_vi_nxv4i32_nxv4i32_i32( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vi_nxv4i32_nxv4i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vxor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vxor.mask.nxv4i32.i32( + %0, + %1, + i32 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vxor_vi_nxv8i32_nxv8i32_i32( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vi_nxv8i32_nxv8i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vxor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vxor.nxv8i32.i32( + %0, + i32 9, + i32 %1) + + ret %a +} + +define @intrinsic_vxor_mask_vi_nxv8i32_nxv8i32_i32( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vi_nxv8i32_nxv8i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vxor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vxor.mask.nxv8i32.i32( + %0, + %1, + i32 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vxor_vi_nxv16i32_nxv16i32_i32( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vi_nxv16i32_nxv16i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m8,ta,mu +; CHECK: vxor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vxor.nxv16i32.i32( + %0, + i32 9, + i32 %1) + + ret %a +} + +define @intrinsic_vxor_mask_vi_nxv16i32_nxv16i32_i32( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vi_nxv16i32_nxv16i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m8,ta,mu +; CHECK: vxor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vxor.mask.nxv16i32.i32( + %0, + %1, + i32 9, + %2, + i32 %3) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vxor-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vxor-rv64.ll new file mode 100644 index 0000000000000..5395e42429d0b --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vxor-rv64.ll @@ -0,0 +1,2377 @@ +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare @llvm.riscv.vxor.nxv1i8.nxv1i8( + , + , + i64); + +define @intrinsic_vxor_vv_nxv1i8_nxv1i8_nxv1i8( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vv_nxv1i8_nxv1i8_nxv1i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vxor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vxor.nxv1i8.nxv1i8( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vxor.mask.nxv1i8.nxv1i8( + , + , + , + , + i64); + +define @intrinsic_vxor_mask_vv_nxv1i8_nxv1i8_nxv1i8( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vv_nxv1i8_nxv1i8_nxv1i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vxor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vxor.mask.nxv1i8.nxv1i8( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vxor.nxv2i8.nxv2i8( + , + , + i64); + +define @intrinsic_vxor_vv_nxv2i8_nxv2i8_nxv2i8( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vv_nxv2i8_nxv2i8_nxv2i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vxor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vxor.nxv2i8.nxv2i8( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vxor.mask.nxv2i8.nxv2i8( + , + , + , + , + i64); + +define @intrinsic_vxor_mask_vv_nxv2i8_nxv2i8_nxv2i8( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vv_nxv2i8_nxv2i8_nxv2i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vxor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vxor.mask.nxv2i8.nxv2i8( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vxor.nxv4i8.nxv4i8( + , + , + i64); + +define @intrinsic_vxor_vv_nxv4i8_nxv4i8_nxv4i8( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vv_nxv4i8_nxv4i8_nxv4i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vxor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vxor.nxv4i8.nxv4i8( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vxor.mask.nxv4i8.nxv4i8( + , + , + , + , + i64); + +define @intrinsic_vxor_mask_vv_nxv4i8_nxv4i8_nxv4i8( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vv_nxv4i8_nxv4i8_nxv4i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vxor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vxor.mask.nxv4i8.nxv4i8( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vxor.nxv8i8.nxv8i8( + , + , + i64); + +define @intrinsic_vxor_vv_nxv8i8_nxv8i8_nxv8i8( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vv_nxv8i8_nxv8i8_nxv8i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vxor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vxor.nxv8i8.nxv8i8( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vxor.mask.nxv8i8.nxv8i8( + , + , + , + , + i64); + +define @intrinsic_vxor_mask_vv_nxv8i8_nxv8i8_nxv8i8( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vv_nxv8i8_nxv8i8_nxv8i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vxor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vxor.mask.nxv8i8.nxv8i8( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vxor.nxv16i8.nxv16i8( + , + , + i64); + +define @intrinsic_vxor_vv_nxv16i8_nxv16i8_nxv16i8( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vv_nxv16i8_nxv16i8_nxv16i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vxor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vxor.nxv16i8.nxv16i8( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vxor.mask.nxv16i8.nxv16i8( + , + , + , + , + i64); + +define @intrinsic_vxor_mask_vv_nxv16i8_nxv16i8_nxv16i8( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vv_nxv16i8_nxv16i8_nxv16i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vxor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vxor.mask.nxv16i8.nxv16i8( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vxor.nxv32i8.nxv32i8( + , + , + i64); + +define @intrinsic_vxor_vv_nxv32i8_nxv32i8_nxv32i8( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vv_nxv32i8_nxv32i8_nxv32i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vxor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vxor.nxv32i8.nxv32i8( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vxor.mask.nxv32i8.nxv32i8( + , + , + , + , + i64); + +define @intrinsic_vxor_mask_vv_nxv32i8_nxv32i8_nxv32i8( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vv_nxv32i8_nxv32i8_nxv32i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vxor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vxor.mask.nxv32i8.nxv32i8( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vxor.nxv64i8.nxv64i8( + , + , + i64); + +define @intrinsic_vxor_vv_nxv64i8_nxv64i8_nxv64i8( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vv_nxv64i8_nxv64i8_nxv64i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m8,ta,mu +; CHECK: vxor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vxor.nxv64i8.nxv64i8( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vxor.mask.nxv64i8.nxv64i8( + , + , + , + , + i64); + +define @intrinsic_vxor_mask_vv_nxv64i8_nxv64i8_nxv64i8( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vv_nxv64i8_nxv64i8_nxv64i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m8,ta,mu +; CHECK: vxor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vxor.mask.nxv64i8.nxv64i8( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vxor.nxv1i16.nxv1i16( + , + , + i64); + +define @intrinsic_vxor_vv_nxv1i16_nxv1i16_nxv1i16( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vv_nxv1i16_nxv1i16_nxv1i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vxor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vxor.nxv1i16.nxv1i16( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vxor.mask.nxv1i16.nxv1i16( + , + , + , + , + i64); + +define @intrinsic_vxor_mask_vv_nxv1i16_nxv1i16_nxv1i16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vv_nxv1i16_nxv1i16_nxv1i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vxor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vxor.mask.nxv1i16.nxv1i16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vxor.nxv2i16.nxv2i16( + , + , + i64); + +define @intrinsic_vxor_vv_nxv2i16_nxv2i16_nxv2i16( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vv_nxv2i16_nxv2i16_nxv2i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vxor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vxor.nxv2i16.nxv2i16( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vxor.mask.nxv2i16.nxv2i16( + , + , + , + , + i64); + +define @intrinsic_vxor_mask_vv_nxv2i16_nxv2i16_nxv2i16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vv_nxv2i16_nxv2i16_nxv2i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vxor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vxor.mask.nxv2i16.nxv2i16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vxor.nxv4i16.nxv4i16( + , + , + i64); + +define @intrinsic_vxor_vv_nxv4i16_nxv4i16_nxv4i16( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vv_nxv4i16_nxv4i16_nxv4i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vxor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vxor.nxv4i16.nxv4i16( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vxor.mask.nxv4i16.nxv4i16( + , + , + , + , + i64); + +define @intrinsic_vxor_mask_vv_nxv4i16_nxv4i16_nxv4i16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vv_nxv4i16_nxv4i16_nxv4i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vxor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vxor.mask.nxv4i16.nxv4i16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vxor.nxv8i16.nxv8i16( + , + , + i64); + +define @intrinsic_vxor_vv_nxv8i16_nxv8i16_nxv8i16( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vv_nxv8i16_nxv8i16_nxv8i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vxor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vxor.nxv8i16.nxv8i16( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vxor.mask.nxv8i16.nxv8i16( + , + , + , + , + i64); + +define @intrinsic_vxor_mask_vv_nxv8i16_nxv8i16_nxv8i16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vv_nxv8i16_nxv8i16_nxv8i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vxor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vxor.mask.nxv8i16.nxv8i16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vxor.nxv16i16.nxv16i16( + , + , + i64); + +define @intrinsic_vxor_vv_nxv16i16_nxv16i16_nxv16i16( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vv_nxv16i16_nxv16i16_nxv16i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vxor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vxor.nxv16i16.nxv16i16( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vxor.mask.nxv16i16.nxv16i16( + , + , + , + , + i64); + +define @intrinsic_vxor_mask_vv_nxv16i16_nxv16i16_nxv16i16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vv_nxv16i16_nxv16i16_nxv16i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vxor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vxor.mask.nxv16i16.nxv16i16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vxor.nxv32i16.nxv32i16( + , + , + i64); + +define @intrinsic_vxor_vv_nxv32i16_nxv32i16_nxv32i16( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vv_nxv32i16_nxv32i16_nxv32i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m8,ta,mu +; CHECK: vxor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vxor.nxv32i16.nxv32i16( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vxor.mask.nxv32i16.nxv32i16( + , + , + , + , + i64); + +define @intrinsic_vxor_mask_vv_nxv32i16_nxv32i16_nxv32i16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vv_nxv32i16_nxv32i16_nxv32i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m8,ta,mu +; CHECK: vxor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vxor.mask.nxv32i16.nxv32i16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vxor.nxv1i32.nxv1i32( + , + , + i64); + +define @intrinsic_vxor_vv_nxv1i32_nxv1i32_nxv1i32( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vv_nxv1i32_nxv1i32_nxv1i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vxor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vxor.nxv1i32.nxv1i32( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vxor.mask.nxv1i32.nxv1i32( + , + , + , + , + i64); + +define @intrinsic_vxor_mask_vv_nxv1i32_nxv1i32_nxv1i32( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vv_nxv1i32_nxv1i32_nxv1i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vxor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vxor.mask.nxv1i32.nxv1i32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vxor.nxv2i32.nxv2i32( + , + , + i64); + +define @intrinsic_vxor_vv_nxv2i32_nxv2i32_nxv2i32( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vv_nxv2i32_nxv2i32_nxv2i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vxor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vxor.nxv2i32.nxv2i32( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vxor.mask.nxv2i32.nxv2i32( + , + , + , + , + i64); + +define @intrinsic_vxor_mask_vv_nxv2i32_nxv2i32_nxv2i32( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vv_nxv2i32_nxv2i32_nxv2i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vxor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vxor.mask.nxv2i32.nxv2i32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vxor.nxv4i32.nxv4i32( + , + , + i64); + +define @intrinsic_vxor_vv_nxv4i32_nxv4i32_nxv4i32( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vv_nxv4i32_nxv4i32_nxv4i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vxor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vxor.nxv4i32.nxv4i32( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vxor.mask.nxv4i32.nxv4i32( + , + , + , + , + i64); + +define @intrinsic_vxor_mask_vv_nxv4i32_nxv4i32_nxv4i32( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vv_nxv4i32_nxv4i32_nxv4i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vxor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vxor.mask.nxv4i32.nxv4i32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vxor.nxv8i32.nxv8i32( + , + , + i64); + +define @intrinsic_vxor_vv_nxv8i32_nxv8i32_nxv8i32( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vv_nxv8i32_nxv8i32_nxv8i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vxor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vxor.nxv8i32.nxv8i32( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vxor.mask.nxv8i32.nxv8i32( + , + , + , + , + i64); + +define @intrinsic_vxor_mask_vv_nxv8i32_nxv8i32_nxv8i32( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vv_nxv8i32_nxv8i32_nxv8i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vxor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vxor.mask.nxv8i32.nxv8i32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vxor.nxv16i32.nxv16i32( + , + , + i64); + +define @intrinsic_vxor_vv_nxv16i32_nxv16i32_nxv16i32( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vv_nxv16i32_nxv16i32_nxv16i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m8,ta,mu +; CHECK: vxor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vxor.nxv16i32.nxv16i32( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vxor.mask.nxv16i32.nxv16i32( + , + , + , + , + i64); + +define @intrinsic_vxor_mask_vv_nxv16i32_nxv16i32_nxv16i32( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vv_nxv16i32_nxv16i32_nxv16i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m8,ta,mu +; CHECK: vxor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vxor.mask.nxv16i32.nxv16i32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vxor.nxv1i64.nxv1i64( + , + , + i64); + +define @intrinsic_vxor_vv_nxv1i64_nxv1i64_nxv1i64( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vv_nxv1i64_nxv1i64_nxv1i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vxor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vxor.nxv1i64.nxv1i64( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vxor.mask.nxv1i64.nxv1i64( + , + , + , + , + i64); + +define @intrinsic_vxor_mask_vv_nxv1i64_nxv1i64_nxv1i64( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vv_nxv1i64_nxv1i64_nxv1i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vxor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vxor.mask.nxv1i64.nxv1i64( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vxor.nxv2i64.nxv2i64( + , + , + i64); + +define @intrinsic_vxor_vv_nxv2i64_nxv2i64_nxv2i64( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vv_nxv2i64_nxv2i64_nxv2i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vxor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vxor.nxv2i64.nxv2i64( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vxor.mask.nxv2i64.nxv2i64( + , + , + , + , + i64); + +define @intrinsic_vxor_mask_vv_nxv2i64_nxv2i64_nxv2i64( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vv_nxv2i64_nxv2i64_nxv2i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vxor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vxor.mask.nxv2i64.nxv2i64( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vxor.nxv4i64.nxv4i64( + , + , + i64); + +define @intrinsic_vxor_vv_nxv4i64_nxv4i64_nxv4i64( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vv_nxv4i64_nxv4i64_nxv4i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vxor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vxor.nxv4i64.nxv4i64( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vxor.mask.nxv4i64.nxv4i64( + , + , + , + , + i64); + +define @intrinsic_vxor_mask_vv_nxv4i64_nxv4i64_nxv4i64( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vv_nxv4i64_nxv4i64_nxv4i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vxor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vxor.mask.nxv4i64.nxv4i64( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vxor.nxv8i64.nxv8i64( + , + , + i64); + +define @intrinsic_vxor_vv_nxv8i64_nxv8i64_nxv8i64( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vv_nxv8i64_nxv8i64_nxv8i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m8,ta,mu +; CHECK: vxor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vxor.nxv8i64.nxv8i64( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vxor.mask.nxv8i64.nxv8i64( + , + , + , + , + i64); + +define @intrinsic_vxor_mask_vv_nxv8i64_nxv8i64_nxv8i64( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vv_nxv8i64_nxv8i64_nxv8i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m8,ta,mu +; CHECK: vxor.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vxor.mask.nxv8i64.nxv8i64( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vxor.nxv1i8.i8( + , + i8, + i64); + +define @intrinsic_vxor_vx_nxv1i8_nxv1i8_i8( %0, i8 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vx_nxv1i8_nxv1i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vxor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vxor.nxv1i8.i8( + %0, + i8 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vxor.mask.nxv1i8.i8( + , + , + i8, + , + i64); + +define @intrinsic_vxor_mask_vx_nxv1i8_nxv1i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vx_nxv1i8_nxv1i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vxor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vxor.mask.nxv1i8.i8( + %0, + %1, + i8 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vxor.nxv2i8.i8( + , + i8, + i64); + +define @intrinsic_vxor_vx_nxv2i8_nxv2i8_i8( %0, i8 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vx_nxv2i8_nxv2i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vxor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vxor.nxv2i8.i8( + %0, + i8 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vxor.mask.nxv2i8.i8( + , + , + i8, + , + i64); + +define @intrinsic_vxor_mask_vx_nxv2i8_nxv2i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vx_nxv2i8_nxv2i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vxor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vxor.mask.nxv2i8.i8( + %0, + %1, + i8 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vxor.nxv4i8.i8( + , + i8, + i64); + +define @intrinsic_vxor_vx_nxv4i8_nxv4i8_i8( %0, i8 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vx_nxv4i8_nxv4i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vxor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vxor.nxv4i8.i8( + %0, + i8 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vxor.mask.nxv4i8.i8( + , + , + i8, + , + i64); + +define @intrinsic_vxor_mask_vx_nxv4i8_nxv4i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vx_nxv4i8_nxv4i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vxor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vxor.mask.nxv4i8.i8( + %0, + %1, + i8 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vxor.nxv8i8.i8( + , + i8, + i64); + +define @intrinsic_vxor_vx_nxv8i8_nxv8i8_i8( %0, i8 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vx_nxv8i8_nxv8i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vxor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vxor.nxv8i8.i8( + %0, + i8 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vxor.mask.nxv8i8.i8( + , + , + i8, + , + i64); + +define @intrinsic_vxor_mask_vx_nxv8i8_nxv8i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vx_nxv8i8_nxv8i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vxor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vxor.mask.nxv8i8.i8( + %0, + %1, + i8 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vxor.nxv16i8.i8( + , + i8, + i64); + +define @intrinsic_vxor_vx_nxv16i8_nxv16i8_i8( %0, i8 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vx_nxv16i8_nxv16i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vxor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vxor.nxv16i8.i8( + %0, + i8 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vxor.mask.nxv16i8.i8( + , + , + i8, + , + i64); + +define @intrinsic_vxor_mask_vx_nxv16i8_nxv16i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vx_nxv16i8_nxv16i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vxor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vxor.mask.nxv16i8.i8( + %0, + %1, + i8 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vxor.nxv32i8.i8( + , + i8, + i64); + +define @intrinsic_vxor_vx_nxv32i8_nxv32i8_i8( %0, i8 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vx_nxv32i8_nxv32i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vxor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vxor.nxv32i8.i8( + %0, + i8 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vxor.mask.nxv32i8.i8( + , + , + i8, + , + i64); + +define @intrinsic_vxor_mask_vx_nxv32i8_nxv32i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vx_nxv32i8_nxv32i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vxor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vxor.mask.nxv32i8.i8( + %0, + %1, + i8 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vxor.nxv64i8.i8( + , + i8, + i64); + +define @intrinsic_vxor_vx_nxv64i8_nxv64i8_i8( %0, i8 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vx_nxv64i8_nxv64i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m8,ta,mu +; CHECK: vxor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vxor.nxv64i8.i8( + %0, + i8 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vxor.mask.nxv64i8.i8( + , + , + i8, + , + i64); + +define @intrinsic_vxor_mask_vx_nxv64i8_nxv64i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vx_nxv64i8_nxv64i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m8,ta,mu +; CHECK: vxor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vxor.mask.nxv64i8.i8( + %0, + %1, + i8 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vxor.nxv1i16.i16( + , + i16, + i64); + +define @intrinsic_vxor_vx_nxv1i16_nxv1i16_i16( %0, i16 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vx_nxv1i16_nxv1i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vxor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vxor.nxv1i16.i16( + %0, + i16 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vxor.mask.nxv1i16.i16( + , + , + i16, + , + i64); + +define @intrinsic_vxor_mask_vx_nxv1i16_nxv1i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vx_nxv1i16_nxv1i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vxor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vxor.mask.nxv1i16.i16( + %0, + %1, + i16 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vxor.nxv2i16.i16( + , + i16, + i64); + +define @intrinsic_vxor_vx_nxv2i16_nxv2i16_i16( %0, i16 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vx_nxv2i16_nxv2i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vxor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vxor.nxv2i16.i16( + %0, + i16 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vxor.mask.nxv2i16.i16( + , + , + i16, + , + i64); + +define @intrinsic_vxor_mask_vx_nxv2i16_nxv2i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vx_nxv2i16_nxv2i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vxor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vxor.mask.nxv2i16.i16( + %0, + %1, + i16 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vxor.nxv4i16.i16( + , + i16, + i64); + +define @intrinsic_vxor_vx_nxv4i16_nxv4i16_i16( %0, i16 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vx_nxv4i16_nxv4i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vxor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vxor.nxv4i16.i16( + %0, + i16 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vxor.mask.nxv4i16.i16( + , + , + i16, + , + i64); + +define @intrinsic_vxor_mask_vx_nxv4i16_nxv4i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vx_nxv4i16_nxv4i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vxor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vxor.mask.nxv4i16.i16( + %0, + %1, + i16 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vxor.nxv8i16.i16( + , + i16, + i64); + +define @intrinsic_vxor_vx_nxv8i16_nxv8i16_i16( %0, i16 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vx_nxv8i16_nxv8i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vxor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vxor.nxv8i16.i16( + %0, + i16 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vxor.mask.nxv8i16.i16( + , + , + i16, + , + i64); + +define @intrinsic_vxor_mask_vx_nxv8i16_nxv8i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vx_nxv8i16_nxv8i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vxor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vxor.mask.nxv8i16.i16( + %0, + %1, + i16 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vxor.nxv16i16.i16( + , + i16, + i64); + +define @intrinsic_vxor_vx_nxv16i16_nxv16i16_i16( %0, i16 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vx_nxv16i16_nxv16i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vxor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vxor.nxv16i16.i16( + %0, + i16 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vxor.mask.nxv16i16.i16( + , + , + i16, + , + i64); + +define @intrinsic_vxor_mask_vx_nxv16i16_nxv16i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vx_nxv16i16_nxv16i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vxor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vxor.mask.nxv16i16.i16( + %0, + %1, + i16 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vxor.nxv32i16.i16( + , + i16, + i64); + +define @intrinsic_vxor_vx_nxv32i16_nxv32i16_i16( %0, i16 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vx_nxv32i16_nxv32i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m8,ta,mu +; CHECK: vxor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vxor.nxv32i16.i16( + %0, + i16 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vxor.mask.nxv32i16.i16( + , + , + i16, + , + i64); + +define @intrinsic_vxor_mask_vx_nxv32i16_nxv32i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vx_nxv32i16_nxv32i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m8,ta,mu +; CHECK: vxor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vxor.mask.nxv32i16.i16( + %0, + %1, + i16 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vxor.nxv1i32.i32( + , + i32, + i64); + +define @intrinsic_vxor_vx_nxv1i32_nxv1i32_i32( %0, i32 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vx_nxv1i32_nxv1i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vxor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vxor.nxv1i32.i32( + %0, + i32 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vxor.mask.nxv1i32.i32( + , + , + i32, + , + i64); + +define @intrinsic_vxor_mask_vx_nxv1i32_nxv1i32_i32( %0, %1, i32 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vx_nxv1i32_nxv1i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vxor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vxor.mask.nxv1i32.i32( + %0, + %1, + i32 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vxor.nxv2i32.i32( + , + i32, + i64); + +define @intrinsic_vxor_vx_nxv2i32_nxv2i32_i32( %0, i32 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vx_nxv2i32_nxv2i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vxor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vxor.nxv2i32.i32( + %0, + i32 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vxor.mask.nxv2i32.i32( + , + , + i32, + , + i64); + +define @intrinsic_vxor_mask_vx_nxv2i32_nxv2i32_i32( %0, %1, i32 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vx_nxv2i32_nxv2i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vxor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vxor.mask.nxv2i32.i32( + %0, + %1, + i32 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vxor.nxv4i32.i32( + , + i32, + i64); + +define @intrinsic_vxor_vx_nxv4i32_nxv4i32_i32( %0, i32 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vx_nxv4i32_nxv4i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vxor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vxor.nxv4i32.i32( + %0, + i32 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vxor.mask.nxv4i32.i32( + , + , + i32, + , + i64); + +define @intrinsic_vxor_mask_vx_nxv4i32_nxv4i32_i32( %0, %1, i32 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vx_nxv4i32_nxv4i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vxor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vxor.mask.nxv4i32.i32( + %0, + %1, + i32 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vxor.nxv8i32.i32( + , + i32, + i64); + +define @intrinsic_vxor_vx_nxv8i32_nxv8i32_i32( %0, i32 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vx_nxv8i32_nxv8i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vxor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vxor.nxv8i32.i32( + %0, + i32 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vxor.mask.nxv8i32.i32( + , + , + i32, + , + i64); + +define @intrinsic_vxor_mask_vx_nxv8i32_nxv8i32_i32( %0, %1, i32 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vx_nxv8i32_nxv8i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vxor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vxor.mask.nxv8i32.i32( + %0, + %1, + i32 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vxor.nxv16i32.i32( + , + i32, + i64); + +define @intrinsic_vxor_vx_nxv16i32_nxv16i32_i32( %0, i32 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vx_nxv16i32_nxv16i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m8,ta,mu +; CHECK: vxor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vxor.nxv16i32.i32( + %0, + i32 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vxor.mask.nxv16i32.i32( + , + , + i32, + , + i64); + +define @intrinsic_vxor_mask_vx_nxv16i32_nxv16i32_i32( %0, %1, i32 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vx_nxv16i32_nxv16i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m8,ta,mu +; CHECK: vxor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vxor.mask.nxv16i32.i32( + %0, + %1, + i32 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vxor.nxv1i64.i64( + , + i64, + i64); + +define @intrinsic_vxor_vx_nxv1i64_nxv1i64_i64( %0, i64 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vx_nxv1i64_nxv1i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vxor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vxor.nxv1i64.i64( + %0, + i64 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vxor.mask.nxv1i64.i64( + , + , + i64, + , + i64); + +define @intrinsic_vxor_mask_vx_nxv1i64_nxv1i64_i64( %0, %1, i64 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vx_nxv1i64_nxv1i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vxor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vxor.mask.nxv1i64.i64( + %0, + %1, + i64 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vxor.nxv2i64.i64( + , + i64, + i64); + +define @intrinsic_vxor_vx_nxv2i64_nxv2i64_i64( %0, i64 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vx_nxv2i64_nxv2i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vxor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vxor.nxv2i64.i64( + %0, + i64 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vxor.mask.nxv2i64.i64( + , + , + i64, + , + i64); + +define @intrinsic_vxor_mask_vx_nxv2i64_nxv2i64_i64( %0, %1, i64 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vx_nxv2i64_nxv2i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vxor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vxor.mask.nxv2i64.i64( + %0, + %1, + i64 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vxor.nxv4i64.i64( + , + i64, + i64); + +define @intrinsic_vxor_vx_nxv4i64_nxv4i64_i64( %0, i64 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vx_nxv4i64_nxv4i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vxor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vxor.nxv4i64.i64( + %0, + i64 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vxor.mask.nxv4i64.i64( + , + , + i64, + , + i64); + +define @intrinsic_vxor_mask_vx_nxv4i64_nxv4i64_i64( %0, %1, i64 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vx_nxv4i64_nxv4i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vxor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vxor.mask.nxv4i64.i64( + %0, + %1, + i64 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vxor.nxv8i64.i64( + , + i64, + i64); + +define @intrinsic_vxor_vx_nxv8i64_nxv8i64_i64( %0, i64 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vx_nxv8i64_nxv8i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m8,ta,mu +; CHECK: vxor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vxor.nxv8i64.i64( + %0, + i64 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vxor.mask.nxv8i64.i64( + , + , + i64, + , + i64); + +define @intrinsic_vxor_mask_vx_nxv8i64_nxv8i64_i64( %0, %1, i64 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vx_nxv8i64_nxv8i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m8,ta,mu +; CHECK: vxor.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vxor.mask.nxv8i64.i64( + %0, + %1, + i64 %2, + %3, + i64 %4) + + ret %a +} + +define @intrinsic_vxor_vi_nxv1i8_nxv1i8_i8( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vi_nxv1i8_nxv1i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vxor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vxor.nxv1i8.i8( + %0, + i8 9, + i64 %1) + + ret %a +} + +define @intrinsic_vxor_mask_vi_nxv1i8_nxv1i8_i8( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vi_nxv1i8_nxv1i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vxor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vxor.mask.nxv1i8.i8( + %0, + %1, + i8 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vxor_vi_nxv2i8_nxv2i8_i8( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vi_nxv2i8_nxv2i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vxor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vxor.nxv2i8.i8( + %0, + i8 9, + i64 %1) + + ret %a +} + +define @intrinsic_vxor_mask_vi_nxv2i8_nxv2i8_i8( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vi_nxv2i8_nxv2i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vxor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vxor.mask.nxv2i8.i8( + %0, + %1, + i8 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vxor_vi_nxv4i8_nxv4i8_i8( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vi_nxv4i8_nxv4i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vxor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vxor.nxv4i8.i8( + %0, + i8 9, + i64 %1) + + ret %a +} + +define @intrinsic_vxor_mask_vi_nxv4i8_nxv4i8_i8( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vi_nxv4i8_nxv4i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vxor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vxor.mask.nxv4i8.i8( + %0, + %1, + i8 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vxor_vi_nxv8i8_nxv8i8_i8( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vi_nxv8i8_nxv8i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vxor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vxor.nxv8i8.i8( + %0, + i8 9, + i64 %1) + + ret %a +} + +define @intrinsic_vxor_mask_vi_nxv8i8_nxv8i8_i8( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vi_nxv8i8_nxv8i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vxor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vxor.mask.nxv8i8.i8( + %0, + %1, + i8 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vxor_vi_nxv16i8_nxv16i8_i8( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vi_nxv16i8_nxv16i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vxor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vxor.nxv16i8.i8( + %0, + i8 9, + i64 %1) + + ret %a +} + +define @intrinsic_vxor_mask_vi_nxv16i8_nxv16i8_i8( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vi_nxv16i8_nxv16i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vxor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vxor.mask.nxv16i8.i8( + %0, + %1, + i8 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vxor_vi_nxv32i8_nxv32i8_i8( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vi_nxv32i8_nxv32i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vxor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vxor.nxv32i8.i8( + %0, + i8 9, + i64 %1) + + ret %a +} + +define @intrinsic_vxor_mask_vi_nxv32i8_nxv32i8_i8( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vi_nxv32i8_nxv32i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vxor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vxor.mask.nxv32i8.i8( + %0, + %1, + i8 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vxor_vi_nxv64i8_nxv64i8_i8( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vi_nxv64i8_nxv64i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m8,ta,mu +; CHECK: vxor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vxor.nxv64i8.i8( + %0, + i8 9, + i64 %1) + + ret %a +} + +define @intrinsic_vxor_mask_vi_nxv64i8_nxv64i8_i8( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vi_nxv64i8_nxv64i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m8,ta,mu +; CHECK: vxor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vxor.mask.nxv64i8.i8( + %0, + %1, + i8 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vxor_vi_nxv1i16_nxv1i16_i16( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vi_nxv1i16_nxv1i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vxor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vxor.nxv1i16.i16( + %0, + i16 9, + i64 %1) + + ret %a +} + +define @intrinsic_vxor_mask_vi_nxv1i16_nxv1i16_i16( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vi_nxv1i16_nxv1i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vxor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vxor.mask.nxv1i16.i16( + %0, + %1, + i16 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vxor_vi_nxv2i16_nxv2i16_i16( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vi_nxv2i16_nxv2i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vxor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vxor.nxv2i16.i16( + %0, + i16 9, + i64 %1) + + ret %a +} + +define @intrinsic_vxor_mask_vi_nxv2i16_nxv2i16_i16( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vi_nxv2i16_nxv2i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vxor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vxor.mask.nxv2i16.i16( + %0, + %1, + i16 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vxor_vi_nxv4i16_nxv4i16_i16( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vi_nxv4i16_nxv4i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vxor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vxor.nxv4i16.i16( + %0, + i16 9, + i64 %1) + + ret %a +} + +define @intrinsic_vxor_mask_vi_nxv4i16_nxv4i16_i16( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vi_nxv4i16_nxv4i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vxor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vxor.mask.nxv4i16.i16( + %0, + %1, + i16 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vxor_vi_nxv8i16_nxv8i16_i16( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vi_nxv8i16_nxv8i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vxor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vxor.nxv8i16.i16( + %0, + i16 9, + i64 %1) + + ret %a +} + +define @intrinsic_vxor_mask_vi_nxv8i16_nxv8i16_i16( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vi_nxv8i16_nxv8i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vxor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vxor.mask.nxv8i16.i16( + %0, + %1, + i16 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vxor_vi_nxv16i16_nxv16i16_i16( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vi_nxv16i16_nxv16i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vxor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vxor.nxv16i16.i16( + %0, + i16 9, + i64 %1) + + ret %a +} + +define @intrinsic_vxor_mask_vi_nxv16i16_nxv16i16_i16( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vi_nxv16i16_nxv16i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vxor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vxor.mask.nxv16i16.i16( + %0, + %1, + i16 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vxor_vi_nxv32i16_nxv32i16_i16( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vi_nxv32i16_nxv32i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m8,ta,mu +; CHECK: vxor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vxor.nxv32i16.i16( + %0, + i16 9, + i64 %1) + + ret %a +} + +define @intrinsic_vxor_mask_vi_nxv32i16_nxv32i16_i16( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vi_nxv32i16_nxv32i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m8,ta,mu +; CHECK: vxor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vxor.mask.nxv32i16.i16( + %0, + %1, + i16 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vxor_vi_nxv1i32_nxv1i32_i32( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vi_nxv1i32_nxv1i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vxor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vxor.nxv1i32.i32( + %0, + i32 9, + i64 %1) + + ret %a +} + +define @intrinsic_vxor_mask_vi_nxv1i32_nxv1i32_i32( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vi_nxv1i32_nxv1i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vxor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vxor.mask.nxv1i32.i32( + %0, + %1, + i32 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vxor_vi_nxv2i32_nxv2i32_i32( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vi_nxv2i32_nxv2i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vxor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vxor.nxv2i32.i32( + %0, + i32 9, + i64 %1) + + ret %a +} + +define @intrinsic_vxor_mask_vi_nxv2i32_nxv2i32_i32( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vi_nxv2i32_nxv2i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vxor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vxor.mask.nxv2i32.i32( + %0, + %1, + i32 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vxor_vi_nxv4i32_nxv4i32_i32( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vi_nxv4i32_nxv4i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vxor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vxor.nxv4i32.i32( + %0, + i32 9, + i64 %1) + + ret %a +} + +define @intrinsic_vxor_mask_vi_nxv4i32_nxv4i32_i32( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vi_nxv4i32_nxv4i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vxor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vxor.mask.nxv4i32.i32( + %0, + %1, + i32 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vxor_vi_nxv8i32_nxv8i32_i32( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vi_nxv8i32_nxv8i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vxor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vxor.nxv8i32.i32( + %0, + i32 9, + i64 %1) + + ret %a +} + +define @intrinsic_vxor_mask_vi_nxv8i32_nxv8i32_i32( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vi_nxv8i32_nxv8i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vxor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vxor.mask.nxv8i32.i32( + %0, + %1, + i32 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vxor_vi_nxv16i32_nxv16i32_i32( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vi_nxv16i32_nxv16i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m8,ta,mu +; CHECK: vxor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vxor.nxv16i32.i32( + %0, + i32 9, + i64 %1) + + ret %a +} + +define @intrinsic_vxor_mask_vi_nxv16i32_nxv16i32_i32( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vi_nxv16i32_nxv16i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m8,ta,mu +; CHECK: vxor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vxor.mask.nxv16i32.i32( + %0, + %1, + i32 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vxor_vi_nxv1i64_nxv1i64_i64( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vi_nxv1i64_nxv1i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vxor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vxor.nxv1i64.i64( + %0, + i64 9, + i64 %1) + + ret %a +} + +define @intrinsic_vxor_mask_vi_nxv1i64_nxv1i64_i64( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vi_nxv1i64_nxv1i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vxor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vxor.mask.nxv1i64.i64( + %0, + %1, + i64 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vxor_vi_nxv2i64_nxv2i64_i64( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vi_nxv2i64_nxv2i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vxor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vxor.nxv2i64.i64( + %0, + i64 9, + i64 %1) + + ret %a +} + +define @intrinsic_vxor_mask_vi_nxv2i64_nxv2i64_i64( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vi_nxv2i64_nxv2i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vxor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vxor.mask.nxv2i64.i64( + %0, + %1, + i64 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vxor_vi_nxv4i64_nxv4i64_i64( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vi_nxv4i64_nxv4i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vxor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vxor.nxv4i64.i64( + %0, + i64 9, + i64 %1) + + ret %a +} + +define @intrinsic_vxor_mask_vi_nxv4i64_nxv4i64_i64( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vi_nxv4i64_nxv4i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vxor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vxor.mask.nxv4i64.i64( + %0, + %1, + i64 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vxor_vi_nxv8i64_nxv8i64_i64( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_vi_nxv8i64_nxv8i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m8,ta,mu +; CHECK: vxor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vxor.nxv8i64.i64( + %0, + i64 9, + i64 %1) + + ret %a +} + +define @intrinsic_vxor_mask_vi_nxv8i64_nxv8i64_i64( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vxor_mask_vi_nxv8i64_nxv8i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m8,ta,mu +; CHECK: vxor.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vxor.mask.nxv8i64.i64( + %0, + %1, + i64 9, + %2, + i64 %3) + + ret %a +} From 0935b0c8695dcc203918d417b27642cb95d1cb8f Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Mon, 21 Dec 2020 14:39:19 -0800 Subject: [PATCH 045/378] [NFC] Remove unused function --- llvm/include/llvm/Analysis/LazyCallGraph.h | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/include/llvm/Analysis/LazyCallGraph.h b/llvm/include/llvm/Analysis/LazyCallGraph.h index f356aec9e0a17..7478e17263669 100644 --- a/llvm/include/llvm/Analysis/LazyCallGraph.h +++ b/llvm/include/llvm/Analysis/LazyCallGraph.h @@ -258,7 +258,6 @@ class LazyCallGraph { iterator begin() { return iterator(Edges.begin(), Edges.end()); } iterator end() { return iterator(Edges.end(), Edges.end()); } - Edge &operator[](int i) { return Edges[i]; } Edge &operator[](Node &N) { assert(EdgeIndexMap.find(&N) != EdgeIndexMap.end() && "No such edge!"); auto &E = Edges[EdgeIndexMap.find(&N)->second]; From be961374611a4be1b042cce7e6cc4cd12a1b4fd7 Mon Sep 17 00:00:00 2001 From: George Mitenkov Date: Tue, 22 Dec 2020 01:44:31 +0300 Subject: [PATCH 046/378] [MLIR][SPIRVToLLVM] Updated documentation on spirv-cpu-runner This patch adds documentation for the `mlir-spirv-cpu-runner`. It provides an overview of applied transformations and passes, as well as an example walk-through. Some typos in the documentation have been fixed as well. Reviewed By: mravishankar Differential Revision: https://reviews.llvm.org/D93620 --- mlir/docs/SPIRVToLLVMDialectConversion.md | 137 +++++++++++++++++++++- 1 file changed, 135 insertions(+), 2 deletions(-) diff --git a/mlir/docs/SPIRVToLLVMDialectConversion.md b/mlir/docs/SPIRVToLLVMDialectConversion.md index 3aa4d0fa43a19..bdae08c1e230f 100644 --- a/mlir/docs/SPIRVToLLVMDialectConversion.md +++ b/mlir/docs/SPIRVToLLVMDialectConversion.md @@ -377,7 +377,7 @@ entry points in LLVM. At the moment, we use the following approach: entry point. For example, `LocalSize` provides information about the work-group size that can be reused. - In order to preserve this inforamtion, `spv.ExecutionMode` is converted to + In order to preserve this information, `spv.ExecutionMode` is converted to a struct global variable that stores the execution mode id and any variables associated with it. In C, the struct has the structure shown below. @@ -816,7 +816,140 @@ to LLVM ops. At the moment, SPIR-V module attributes are ignored. ## `mlir-spirv-cpu-runner` -**Note: this is a section in progress, more information will appear soon** +`mlir-spirv-cpu-runner` allows to execute `gpu` dialect kernel on the CPU via +SPIR-V to LLVM dialect conversion. Currently, only single-threaded kernel is +supported. + +To build the runner, add the following option to `cmake`: +```bash +-DMLIR_SPIRV_CPU_RUNNER_ENABLED=1 +``` + +### Pipeline + +The `gpu` module with the kernel and the host code undergo the following +transformations: + +* Convert the `gpu` module into SPIR-V dialect, lower ABI attributes and + update version, capability and extension. + +* Emulate the kernel call by converting the launching operation into a normal + function call. The data from the host side to the device is passed via + copying to global variables. These are created in both the host and the + kernel code and later linked when nested modules are folded. + +* Convert SPIR-V dialect kernel to LLVM dialect via the new conversion path. + +After these passes, the IR transforms into a nested LLVM module - a main module +representing the host code and a kernel module. These modules are linked and +executed using `ExecutionEngine`. + +### Walk-through + +This section gives a detailed overview of the IR changes while running +`mlir-spirv-cpu-runner`. First, consider that we have the following IR. (For +simplicity some type annotations and function implementations have been +omitted). + +```mlir +gpu.module @foo { + gpu.func @bar(%arg: memref<8xi32>) { + // Kernel code. + gpu.return + } +} + +func @main() { + // Fill the buffer with some data + %buffer = alloc : memref<8xi32> + %data = ... + call fillBuffer(%buffer, %data) + + "gpu.launch_func"(/*grid dimensions*/, %buffer) { + kernel = @foo::bar + } +} +``` + +Lowering `gpu` dialect to SPIR-V dialect results in + +```mlir +spv.module @__spv__foo /*VCE triple and other metadata here*/ { + spv.globalVariable @__spv__foo_arg bind(0,0) : ... + spv.func @bar() { + // Kernel code. + } + spv.EntryPoint @bar, ... +} + +func @main() { + // Fill the buffer with some data. + %buffer = alloc : memref<8xi32> + %data = ... + call fillBuffer(%buffer, %data) + + "gpu.launch_func"(/*grid dimensions*/, %buffer) { + kernel = @foo::bar + } +} +``` + +Then, the lowering from standard dialect to LLVM dialect is applied to the host +code. + +```mlir +spv.module @__spv__foo /*VCE triple and other metadata here*/ { + spv.globalVariable @__spv__foo_arg bind(0,0) : ... + spv.func @bar() { + // Kernel code. + } + spv.EntryPoint @bar, ... +} + +// Kernel function declaration. +llvm.func @__spv__foo_bar() : ... + +llvm.func @main() { + // Fill the buffer with some data. + llvm.call fillBuffer(%buffer, %data) + + // Copy data to the global variable, call kernel, and copy the data back. + %addr = llvm.mlir.addressof @__spv__foo_arg_descriptor_set0_binding0 : ... + "llvm.intr.memcpy"(%addr, %buffer) : ... + llvm.call @__spv__foo_bar() + "llvm.intr.memcpy"(%buffer, %addr) : ... + + llvm.return +} +``` + +Finally, SPIR-V module is converted to LLVM and the symbol names are resolved +for the linkage. + +```mlir +module @__spv__foo { + llvm.mlir.global @__spv__foo_arg_descriptor_set0_binding0 : ... + llvm.func @__spv__foo_bar() { + // Kernel code. + } +} + +// Kernel function declaration. +llvm.func @__spv__foo_bar() : ... + +llvm.func @main() { + // Fill the buffer with some data. + llvm.call fillBuffer(%buffer, %data) + + // Copy data to the global variable, call kernel, and copy the data back. + %addr = llvm.mlir.addressof @__spv__foo_arg_descriptor_set0_binding0 : ... + "llvm.intr.memcpy"(%addr, %buffer) : ... + llvm.call @__spv__foo_bar() + "llvm.intr.memcpy"(%buffer, %addr) : ... + + llvm.return +} +``` [LLVMFunctionAttributes]: https://llvm.org/docs/LangRef.html#function-attributes [SPIRVFunctionAttributes]: https://www.khronos.org/registry/spir-v/specs/unified1/SPIRV.html#_a_id_function_control_a_function_control From 4ad0cfd4de414f9bedf48ec1034e663fe59efee4 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Mon, 21 Dec 2020 13:29:56 -0800 Subject: [PATCH 047/378] llvm-profgen: Parse command line arguments after initializing targets I am experimenting with turning backends into loadable modules and in that scenario, target specific command line arguments won't be available until after the targets are initialized. Also, most other tools initialize targets before parsing arguments. Reviewed By: wlei Differential Revision: https://reviews.llvm.org/D93348 --- llvm/tools/llvm-profgen/llvm-profgen.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/tools/llvm-profgen/llvm-profgen.cpp b/llvm/tools/llvm-profgen/llvm-profgen.cpp index 665ee7c791a7a..0f4d8f015439b 100644 --- a/llvm/tools/llvm-profgen/llvm-profgen.cpp +++ b/llvm/tools/llvm-profgen/llvm-profgen.cpp @@ -35,13 +35,13 @@ using namespace sampleprof; int main(int argc, const char *argv[]) { InitLLVM X(argc, argv); - cl::ParseCommandLineOptions(argc, argv, "llvm SPGO profile generator\n"); - // Initialize targets and assembly printers/parsers. InitializeAllTargetInfos(); InitializeAllTargetMCs(); InitializeAllDisassemblers(); + cl::ParseCommandLineOptions(argc, argv, "llvm SPGO profile generator\n"); + // Load binaries and parse perf events and samples PerfReader Reader(BinaryFilenames); Reader.parsePerfTraces(PerfTraceFilenames); From 7f40bb3b044fa673772f4d68351f7bd7c38294d4 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Mon, 21 Dec 2020 15:16:09 -0800 Subject: [PATCH 048/378] HowToReleaseLLVM: Update document to match the current release process Change Summary: * Clarify that release manager can commit without code owner approval (but are still highly encouraged to get approval). * Clarify that there is no official release criteria. * Document what types of changes are allowed in each release phase. This is update is based on the RFC submitted here: http://lists.llvm.org/pipermail/llvm-dev/2020-May/141730.html Reviewed By: hans Differential Revision: https://reviews.llvm.org/D93493 --- llvm/docs/HowToReleaseLLVM.rst | 67 +++++++++++++++++++--------------- 1 file changed, 38 insertions(+), 29 deletions(-) diff --git a/llvm/docs/HowToReleaseLLVM.rst b/llvm/docs/HowToReleaseLLVM.rst index b0308a1841946..2fce4777175e0 100644 --- a/llvm/docs/HowToReleaseLLVM.rst +++ b/llvm/docs/HowToReleaseLLVM.rst @@ -50,9 +50,16 @@ The release process is roughly as follows: * Finally, release! -The release process will be accelerated for dot releases. If the first round -of testing finds no critical bugs and no regressions since the last major release, -then additional rounds of testing will not be required. +* Announce bug fix release schedule to the LLVM community and update the website. + +* Tag bug fix -rc1 after 4 weeks have passed. + +* Tag bug fix -rc2 4 weeks after -rc1. + +* Tag additional -rc candidates, if needed, to fix critical issues in + previous -rc releases. + +* Tag final release. Release Process =============== @@ -119,7 +126,7 @@ Tag release candidates: $ git tag -a llvmorg-X.Y.Z-rcN -The Release Manager may supply pre-packaged source tarballs for users. This can +The Release Manager must supply pre-packaged source tarballs for users. This can be done with the export.sh script in utils/release. Tarballs, release binaries, or any other release artifacts must be uploaded to @@ -153,23 +160,16 @@ The minimum required version of the tools you'll need are :doc:`here Date: Mon, 21 Dec 2020 15:32:35 -0800 Subject: [PATCH 049/378] [Driver] Default Generic_GCC ppc/ppc64/ppc64le to -fasynchronous-unwind-tables GCC made the switch on 2018-04-10 ("rs6000: Enable -fasynchronous-unwind-tables by default"). In Clang, FreeBSD/NetBSD powerpc have already defaulted to -fasynchronous-unwind-tables. This patch defaults Generic_GCC powerpc (which affects Linux) to use -fasynchronous-unwind-tables. Reviewed By: #powerpc, nemanjai Differential Revision: https://reviews.llvm.org/D92054 --- clang/docs/ReleaseNotes.rst | 4 ++++ clang/lib/Driver/ToolChains/Gnu.cpp | 3 +++ clang/test/Driver/ppc-features.cpp | 12 +++++++----- 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index a3038aa03cded..dd4de2d2015f8 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -118,6 +118,10 @@ Modified Compiler Flags `-fno-delete-null-pointer-checks` has gained the power to remove the `nonnull` attribute on `this` for configurations that need it to be nullable. - ``-gsplit-dwarf`` no longer implies ``-g2``. +- ``-fasynchronous-unwind-tables`` is now the default on Linux AArch64/PowerPC. + This behavior matches newer GCC. + (`D91760 `_) + (`D92054 `_) Removed Compiler Flags ------------------------- diff --git a/clang/lib/Driver/ToolChains/Gnu.cpp b/clang/lib/Driver/ToolChains/Gnu.cpp index 08158ba4bae8c..9da6d8e355941 100644 --- a/clang/lib/Driver/ToolChains/Gnu.cpp +++ b/clang/lib/Driver/ToolChains/Gnu.cpp @@ -2674,6 +2674,9 @@ void Generic_GCC::printVerboseInfo(raw_ostream &OS) const { bool Generic_GCC::IsUnwindTablesDefault(const ArgList &Args) const { switch (getArch()) { case llvm::Triple::aarch64: + case llvm::Triple::ppc: + case llvm::Triple::ppc64: + case llvm::Triple::ppc64le: case llvm::Triple::x86_64: return true; default: diff --git a/clang/test/Driver/ppc-features.cpp b/clang/test/Driver/ppc-features.cpp index 91ec459ce1816..fceda63f5a231 100644 --- a/clang/test/Driver/ppc-features.cpp +++ b/clang/test/Driver/ppc-features.cpp @@ -1,6 +1,7 @@ /// Check default CC1 and linker options for ppc32. // RUN: %clang -### -target powerpc-unknown-linux-gnu %s 2>&1 | FileCheck --check-prefix=PPC32 %s -// PPC32: "-mfloat-abi" "hard" +// PPC32: "-munwind-tables" +// PPC32-SAME: "-mfloat-abi" "hard" // PPC32: "-m" "elf32ppclinux" @@ -38,11 +39,12 @@ /// Check default CC1 and linker options for ppc64. -// RUN: %clang -### -target powerpc64le-unknown-linux-gnu %s 2>&1 | FileCheck --check-prefix=PPC64 %s -// RUN: %clang -### -target powerpc64-unknown-linux-gnu %s 2>&1 | FileCheck -check-prefix=PPC64BE %s -// PPC64: "-mfloat-abi" "hard" +// RUN: %clang -### -target powerpc64le-unknown-linux-gnu %s 2>&1 | FileCheck --check-prefixes=PPC64,PPC64LE %s +// RUN: %clang -### -target powerpc64-unknown-linux-gnu %s 2>&1 | FileCheck --check-prefixes=PPC64,PPC64BE %s +// PPC64: "-munwind-tables" +// PPC64-SAME: "-mfloat-abi" "hard" -// PPC64: "-m" "elf64lppc" +// PPC64LE: "-m" "elf64lppc" // PPC64BE: "-m" "elf64ppc" // check -msoft-float option for ppc64 From c60a58f8d4354ca1a6915045774bf98cfada8ef4 Mon Sep 17 00:00:00 2001 From: Congzhe Cao Date: Mon, 21 Dec 2020 13:33:58 -0500 Subject: [PATCH 050/378] [InstCombine] Add check of i1 types in select-to-zext/sext transformation When doing select-to-zext/sext transformations, we should not handle TrueVal and FalseVal of i1 type otherwise it would result in zext/sext i1 to i1. Reviewed By: spatel Differential Revision: https://reviews.llvm.org/D93272 --- llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index e05fa4ffa4032..fe21f300a4177 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -2606,7 +2606,10 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) { // select i1 %c, <2 x i8> <1, 1>, <2 x i8> <0, 0> // because that may need 3 instructions to splat the condition value: // extend, insertelement, shufflevector. - if (SelType->isIntOrIntVectorTy() && + // + // Do not handle i1 TrueVal and FalseVal otherwise would result in + // zext/sext i1 to i1. + if (SelType->isIntOrIntVectorTy() && !SelType->isIntOrIntVectorTy(1) && CondVal->getType()->isVectorTy() == SelType->isVectorTy()) { // select C, 1, 0 -> zext C to int if (match(TrueVal, m_One()) && match(FalseVal, m_Zero())) From 83274a0773f6a20abdc848b448009e0195c42166 Mon Sep 17 00:00:00 2001 From: Tres Popp Date: Mon, 21 Dec 2020 22:57:34 +0100 Subject: [PATCH 051/378] [mlir] Add SmallVector sizes This is a temporary fix until figuring out how to correct the forward declare in mlir/include/mlir/Support/LLVM.h Differential Revision: https://reviews.llvm.org/D93666 --- mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp index 7b1300da1783f..09c662c74477d 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp @@ -198,8 +198,8 @@ void SwitchOp::build(OpBuilder &builder, OperationState &result, Value value, ArrayRef caseValues, BlockRange caseDestinations, ArrayRef caseOperands, ArrayRef branchWeights) { - SmallVector flattenedCaseOperands; - SmallVector caseOperandOffsets; + SmallVector flattenedCaseOperands; + SmallVector caseOperandOffsets; int32_t offset = 0; for (ValueRange operands : caseOperands) { flattenedCaseOperands.append(operands.begin(), operands.end()); @@ -230,8 +230,8 @@ parseSwitchOpCases(OpAsmParser &parser, ElementsAttr &caseValues, SmallVectorImpl &caseOperands, SmallVectorImpl &caseOperandTypes, ElementsAttr &caseOperandOffsets) { - SmallVector values; - SmallVector offsets; + SmallVector values; + SmallVector offsets; int32_t value, offset = 0; do { OptionalParseResult integerParseResult = parser.parseOptionalInteger(value); @@ -243,7 +243,7 @@ parseSwitchOpCases(OpAsmParser &parser, ElementsAttr &caseValues, values.push_back(value); Block *destination; - SmallVector operands; + SmallVector operands; if (parser.parseColon() || parser.parseSuccessor(destination)) return failure(); if (!parser.parseOptionalLParen()) { From 704981b43736b2b9788cff0cf493d8b77ce380f5 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 21 Dec 2020 14:59:45 -0800 Subject: [PATCH 052/378] [RISCV] Update vmv.v.v-rv32.ll and vmv.v.v-rv64.ll to test the correct intrinsics. These were accidentally identical to the vmv.v.x tests. I must have fumbled when I copied them from our downstream repo. --- llvm/test/CodeGen/RISCV/rvv/vmv.v.v-rv32.ll | 662 ++++++++++-------- llvm/test/CodeGen/RISCV/rvv/vmv.v.v-rv64.ll | 706 ++++++++++---------- 2 files changed, 716 insertions(+), 652 deletions(-) diff --git a/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-rv32.ll index d22ac605a20b6..2fff963c5d195 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-rv32.ll @@ -1,505 +1,593 @@ -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+experimental-zfh -verify-machineinstrs \ ; RUN: --riscv-no-aliases < %s | FileCheck %s -declare @llvm.riscv.vmv.v.x.nxv1i8.i8( - i8, +declare @llvm.riscv.vmv.v.v.nxv1i8( + , i32); -define @intrinsic_vmv.v.x_x_nxv1i8_i8(i8 %0, i32 %1) nounwind { +define @intrinsic_vmv.v.v_v_nxv1i8_nxv1i8( %0, i32 %1) nounwind { entry: -; CHECK-LABEL: intrinsic_vmv.v.x_x_nxv1i8_i8 -; CHECK: vsetvli {{.*}}, a1, e8,mf8 -; CHECK: vmv.v.x {{v[0-9]+}}, a0 - %a = call @llvm.riscv.vmv.v.x.nxv1i8.i8( - i8 %0, +; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv1i8_nxv1i8 +; CHECK: vsetvli {{.*}}, a0, e8,mf8 +; CHECK: vmv.v.v {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmv.v.v.nxv1i8( + %0, i32 %1) ret %a } -declare @llvm.riscv.vmv.v.x.nxv2i8.i8( - i8, +declare @llvm.riscv.vmv.v.v.nxv2i8( + , i32); -define @intrinsic_vmv.v.x_x_nxv2i8_i8(i8 %0, i32 %1) nounwind { +define @intrinsic_vmv.v.v_v_nxv2i8_nxv2i8( %0, i32 %1) nounwind { entry: -; CHECK-LABEL: intrinsic_vmv.v.x_x_nxv2i8_i8 -; CHECK: vsetvli {{.*}}, a1, e8,mf4 -; CHECK: vmv.v.x {{v[0-9]+}}, a0 - %a = call @llvm.riscv.vmv.v.x.nxv2i8.i8( - i8 %0, +; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv2i8_nxv2i8 +; CHECK: vsetvli {{.*}}, a0, e8,mf4 +; CHECK: vmv.v.v {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmv.v.v.nxv2i8( + %0, i32 %1) ret %a } -declare @llvm.riscv.vmv.v.x.nxv4i8.i8( - i8, +declare @llvm.riscv.vmv.v.v.nxv4i8( + , i32); -define @intrinsic_vmv.v.x_x_nxv4i8_i8(i8 %0, i32 %1) nounwind { +define @intrinsic_vmv.v.v_v_nxv4i8_nxv4i8( %0, i32 %1) nounwind { entry: -; CHECK-LABEL: intrinsic_vmv.v.x_x_nxv4i8_i8 -; CHECK: vsetvli {{.*}}, a1, e8,mf2 -; CHECK: vmv.v.x {{v[0-9]+}}, a0 - %a = call @llvm.riscv.vmv.v.x.nxv4i8.i8( - i8 %0, +; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv4i8_nxv4i8 +; CHECK: vsetvli {{.*}}, a0, e8,mf2 +; CHECK: vmv.v.v {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmv.v.v.nxv4i8( + %0, i32 %1) ret %a } -declare @llvm.riscv.vmv.v.x.nxv8i8.i8( - i8, +declare @llvm.riscv.vmv.v.v.nxv8i8( + , i32); -define @intrinsic_vmv.v.x_x_nxv8i8_i8(i8 %0, i32 %1) nounwind { +define @intrinsic_vmv.v.v_v_nxv8i8_nxv8i8( %0, i32 %1) nounwind { entry: -; CHECK-LABEL: intrinsic_vmv.v.x_x_nxv8i8_i8 -; CHECK: vsetvli {{.*}}, a1, e8,m1 -; CHECK: vmv.v.x {{v[0-9]+}}, a0 - %a = call @llvm.riscv.vmv.v.x.nxv8i8.i8( - i8 %0, +; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv8i8_nxv8i8 +; CHECK: vsetvli {{.*}}, a0, e8,m1 +; CHECK: vmv.v.v {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmv.v.v.nxv8i8( + %0, i32 %1) ret %a } -declare @llvm.riscv.vmv.v.x.nxv16i8.i8( - i8, +declare @llvm.riscv.vmv.v.v.nxv16i8( + , i32); -define @intrinsic_vmv.v.x_x_nxv16i8_i8(i8 %0, i32 %1) nounwind { +define @intrinsic_vmv.v.v_v_nxv16i8_nxv16i8( %0, i32 %1) nounwind { entry: -; CHECK-LABEL: intrinsic_vmv.v.x_x_nxv16i8_i8 -; CHECK: vsetvli {{.*}}, a1, e8,m2 -; CHECK: vmv.v.x {{v[0-9]+}}, a0 - %a = call @llvm.riscv.vmv.v.x.nxv16i8.i8( - i8 %0, +; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv16i8_nxv16i8 +; CHECK: vsetvli {{.*}}, a0, e8,m2 +; CHECK: vmv.v.v {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmv.v.v.nxv16i8( + %0, i32 %1) ret %a } -declare @llvm.riscv.vmv.v.x.nxv32i8.i8( - i8, +declare @llvm.riscv.vmv.v.v.nxv32i8( + , i32); -define @intrinsic_vmv.v.x_x_nxv32i8_i8(i8 %0, i32 %1) nounwind { +define @intrinsic_vmv.v.v_v_nxv32i8_nxv32i8( %0, i32 %1) nounwind { entry: -; CHECK-LABEL: intrinsic_vmv.v.x_x_nxv32i8_i8 -; CHECK: vsetvli {{.*}}, a1, e8,m4 -; CHECK: vmv.v.x {{v[0-9]+}}, a0 - %a = call @llvm.riscv.vmv.v.x.nxv32i8.i8( - i8 %0, +; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv32i8_nxv32i8 +; CHECK: vsetvli {{.*}}, a0, e8,m4 +; CHECK: vmv.v.v {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmv.v.v.nxv32i8( + %0, i32 %1) ret %a } -declare @llvm.riscv.vmv.v.x.nxv64i8.i8( - i8, +declare @llvm.riscv.vmv.v.v.nxv64i8( + , i32); -define @intrinsic_vmv.v.x_x_nxv64i8_i8(i8 %0, i32 %1) nounwind { +define @intrinsic_vmv.v.v_v_nxv64i8_nxv64i8( %0, i32 %1) nounwind { entry: -; CHECK-LABEL: intrinsic_vmv.v.x_x_nxv64i8_i8 -; CHECK: vsetvli {{.*}}, a1, e8,m8 -; CHECK: vmv.v.x {{v[0-9]+}}, a0 - %a = call @llvm.riscv.vmv.v.x.nxv64i8.i8( - i8 %0, +; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv64i8_nxv64i8 +; CHECK: vsetvli {{.*}}, a0, e8,m8 +; CHECK: vmv.v.v {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmv.v.v.nxv64i8( + %0, i32 %1) ret %a } -declare @llvm.riscv.vmv.v.x.nxv1i16.i16( - i16, +declare @llvm.riscv.vmv.v.v.nxv1i16( + , i32); -define @intrinsic_vmv.v.x_x_nxv1i16_i16(i16 %0, i32 %1) nounwind { +define @intrinsic_vmv.v.v_v_nxv1i16_nxv1i16( %0, i32 %1) nounwind { entry: -; CHECK-LABEL: intrinsic_vmv.v.x_x_nxv1i16_i16 -; CHECK: vsetvli {{.*}}, a1, e16,mf4 -; CHECK: vmv.v.x {{v[0-9]+}}, a0 - %a = call @llvm.riscv.vmv.v.x.nxv1i16.i16( - i16 %0, +; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv1i16_nxv1i16 +; CHECK: vsetvli {{.*}}, a0, e16,mf4 +; CHECK: vmv.v.v {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmv.v.v.nxv1i16( + %0, i32 %1) ret %a } -declare @llvm.riscv.vmv.v.x.nxv2i16.i16( - i16, +declare @llvm.riscv.vmv.v.v.nxv2i16( + , i32); -define @intrinsic_vmv.v.x_x_nxv2i16_i16(i16 %0, i32 %1) nounwind { +define @intrinsic_vmv.v.v_v_nxv2i16_nxv2i16( %0, i32 %1) nounwind { entry: -; CHECK-LABEL: intrinsic_vmv.v.x_x_nxv2i16_i16 -; CHECK: vsetvli {{.*}}, a1, e16,mf2 -; CHECK: vmv.v.x {{v[0-9]+}}, a0 - %a = call @llvm.riscv.vmv.v.x.nxv2i16.i16( - i16 %0, +; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv2i16_nxv2i16 +; CHECK: vsetvli {{.*}}, a0, e16,mf2 +; CHECK: vmv.v.v {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmv.v.v.nxv2i16( + %0, i32 %1) ret %a } -declare @llvm.riscv.vmv.v.x.nxv4i16.i16( - i16, +declare @llvm.riscv.vmv.v.v.nxv4i16( + , i32); -define @intrinsic_vmv.v.x_x_nxv4i16_i16(i16 %0, i32 %1) nounwind { +define @intrinsic_vmv.v.v_v_nxv4i16_nxv4i16( %0, i32 %1) nounwind { entry: -; CHECK-LABEL: intrinsic_vmv.v.x_x_nxv4i16_i16 -; CHECK: vsetvli {{.*}}, a1, e16,m1 -; CHECK: vmv.v.x {{v[0-9]+}}, a0 - %a = call @llvm.riscv.vmv.v.x.nxv4i16.i16( - i16 %0, +; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv4i16_nxv4i16 +; CHECK: vsetvli {{.*}}, a0, e16,m1 +; CHECK: vmv.v.v {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmv.v.v.nxv4i16( + %0, i32 %1) ret %a } -declare @llvm.riscv.vmv.v.x.nxv8i16.i16( - i16, +declare @llvm.riscv.vmv.v.v.nxv8i16( + , i32); -define @intrinsic_vmv.v.x_x_nxv8i16_i16(i16 %0, i32 %1) nounwind { +define @intrinsic_vmv.v.v_v_nxv8i16_nxv8i16( %0, i32 %1) nounwind { entry: -; CHECK-LABEL: intrinsic_vmv.v.x_x_nxv8i16_i16 -; CHECK: vsetvli {{.*}}, a1, e16,m2 -; CHECK: vmv.v.x {{v[0-9]+}}, a0 - %a = call @llvm.riscv.vmv.v.x.nxv8i16.i16( - i16 %0, +; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv8i16_nxv8i16 +; CHECK: vsetvli {{.*}}, a0, e16,m2 +; CHECK: vmv.v.v {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmv.v.v.nxv8i16( + %0, i32 %1) ret %a } -declare @llvm.riscv.vmv.v.x.nxv16i16.i16( - i16, +declare @llvm.riscv.vmv.v.v.nxv16i16( + , i32); -define @intrinsic_vmv.v.x_x_nxv16i16_i16(i16 %0, i32 %1) nounwind { +define @intrinsic_vmv.v.v_v_nxv16i16_nxv16i16( %0, i32 %1) nounwind { entry: -; CHECK-LABEL: intrinsic_vmv.v.x_x_nxv16i16_i16 -; CHECK: vsetvli {{.*}}, a1, e16,m4 -; CHECK: vmv.v.x {{v[0-9]+}}, a0 - %a = call @llvm.riscv.vmv.v.x.nxv16i16.i16( - i16 %0, +; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv16i16_nxv16i16 +; CHECK: vsetvli {{.*}}, a0, e16,m4 +; CHECK: vmv.v.v {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmv.v.v.nxv16i16( + %0, i32 %1) ret %a } -declare @llvm.riscv.vmv.v.x.nxv32i16.i16( - i16, +declare @llvm.riscv.vmv.v.v.nxv32i16( + , i32); -define @intrinsic_vmv.v.x_x_nxv32i16_i16(i16 %0, i32 %1) nounwind { +define @intrinsic_vmv.v.v_v_nxv32i16_nxv32i16( %0, i32 %1) nounwind { entry: -; CHECK-LABEL: intrinsic_vmv.v.x_x_nxv32i16_i16 -; CHECK: vsetvli {{.*}}, a1, e16,m8 -; CHECK: vmv.v.x {{v[0-9]+}}, a0 - %a = call @llvm.riscv.vmv.v.x.nxv32i16.i16( - i16 %0, +; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv32i16_nxv32i16 +; CHECK: vsetvli {{.*}}, a0, e16,m8 +; CHECK: vmv.v.v {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmv.v.v.nxv32i16( + %0, i32 %1) ret %a } -declare @llvm.riscv.vmv.v.x.nxv1i32.i32( - i32, +declare @llvm.riscv.vmv.v.v.nxv1i32( + , i32); -define @intrinsic_vmv.v.x_x_nxv1i32_i32(i32 %0, i32 %1) nounwind { +define @intrinsic_vmv.v.v_v_nxv1i32_nxv1i32( %0, i32 %1) nounwind { entry: -; CHECK-LABEL: intrinsic_vmv.v.x_x_nxv1i32_i32 -; CHECK: vsetvli {{.*}}, a1, e32,mf2 -; CHECK: vmv.v.x {{v[0-9]+}}, a0 - %a = call @llvm.riscv.vmv.v.x.nxv1i32.i32( - i32 %0, +; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv1i32_nxv1i32 +; CHECK: vsetvli {{.*}}, a0, e32,mf2 +; CHECK: vmv.v.v {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmv.v.v.nxv1i32( + %0, i32 %1) ret %a } -declare @llvm.riscv.vmv.v.x.nxv2i32.i32( - i32, +declare @llvm.riscv.vmv.v.v.nxv2i32( + , i32); -define @intrinsic_vmv.v.x_x_nxv2i32_i32(i32 %0, i32 %1) nounwind { +define @intrinsic_vmv.v.v_v_nxv2i32_nxv2i32( %0, i32 %1) nounwind { entry: -; CHECK-LABEL: intrinsic_vmv.v.x_x_nxv2i32_i32 -; CHECK: vsetvli {{.*}}, a1, e32,m1 -; CHECK: vmv.v.x {{v[0-9]+}}, a0 - %a = call @llvm.riscv.vmv.v.x.nxv2i32.i32( - i32 %0, +; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv2i32_nxv2i32 +; CHECK: vsetvli {{.*}}, a0, e32,m1 +; CHECK: vmv.v.v {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmv.v.v.nxv2i32( + %0, i32 %1) ret %a } -declare @llvm.riscv.vmv.v.x.nxv4i32.i32( - i32, +declare @llvm.riscv.vmv.v.v.nxv4i32( + , i32); -define @intrinsic_vmv.v.x_x_nxv4i32_i32(i32 %0, i32 %1) nounwind { +define @intrinsic_vmv.v.v_v_nxv4i32_nxv4i32( %0, i32 %1) nounwind { entry: -; CHECK-LABEL: intrinsic_vmv.v.x_x_nxv4i32_i32 -; CHECK: vsetvli {{.*}}, a1, e32,m2 -; CHECK: vmv.v.x {{v[0-9]+}}, a0 - %a = call @llvm.riscv.vmv.v.x.nxv4i32.i32( - i32 %0, +; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv4i32_nxv4i32 +; CHECK: vsetvli {{.*}}, a0, e32,m2 +; CHECK: vmv.v.v {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmv.v.v.nxv4i32( + %0, i32 %1) ret %a } -declare @llvm.riscv.vmv.v.x.nxv8i32.i32( - i32, +declare @llvm.riscv.vmv.v.v.nxv8i32( + , i32); -define @intrinsic_vmv.v.x_x_nxv8i32_i32(i32 %0, i32 %1) nounwind { +define @intrinsic_vmv.v.v_v_nxv8i32_nxv8i32( %0, i32 %1) nounwind { entry: -; CHECK-LABEL: intrinsic_vmv.v.x_x_nxv8i32_i32 -; CHECK: vsetvli {{.*}}, a1, e32,m4 -; CHECK: vmv.v.x {{v[0-9]+}}, a0 - %a = call @llvm.riscv.vmv.v.x.nxv8i32.i32( - i32 %0, +; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv8i32_nxv8i32 +; CHECK: vsetvli {{.*}}, a0, e32,m4 +; CHECK: vmv.v.v {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmv.v.v.nxv8i32( + %0, i32 %1) ret %a } -declare @llvm.riscv.vmv.v.x.nxv16i32.i32( - i32, +declare @llvm.riscv.vmv.v.v.nxv16i32( + , i32); -define @intrinsic_vmv.v.x_x_nxv16i32_i32(i32 %0, i32 %1) nounwind { +define @intrinsic_vmv.v.v_v_nxv16i32_nxv16i32( %0, i32 %1) nounwind { entry: -; CHECK-LABEL: intrinsic_vmv.v.x_x_nxv16i32_i32 -; CHECK: vsetvli {{.*}}, a1, e32,m8 -; CHECK: vmv.v.x {{v[0-9]+}}, a0 - %a = call @llvm.riscv.vmv.v.x.nxv16i32.i32( - i32 %0, +; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv16i32_nxv16i32 +; CHECK: vsetvli {{.*}}, a0, e32,m8 +; CHECK: vmv.v.v {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmv.v.v.nxv16i32( + %0, i32 %1) ret %a } -define @intrinsic_vmv.v.x_i_nxv1i8_i8(i32 %0) nounwind { -entry: -; CHECK-LABEL: intrinsic_vmv.v.x_i_nxv1i8_i8 -; CHECK: vsetvli {{.*}}, a0, e8,mf8 -; CHECK: vmv.v.i {{v[0-9]+}}, 9 - %a = call @llvm.riscv.vmv.v.x.nxv1i8.i8( - i8 9, - i32 %0) - - ret %a -} +declare @llvm.riscv.vmv.v.v.nxv1i64( + , + i32); -define @intrinsic_vmv.v.x_i_nxv2i8_i8(i32 %0) nounwind { +define @intrinsic_vmv.v.v_v_nxv1i64_nxv1i64( %0, i32 %1) nounwind { entry: -; CHECK-LABEL: intrinsic_vmv.v.x_i_nxv2i8_i8 -; CHECK: vsetvli {{.*}}, a0, e8,mf4 -; CHECK: vmv.v.i {{v[0-9]+}}, 9 - %a = call @llvm.riscv.vmv.v.x.nxv2i8.i8( - i8 9, - i32 %0) +; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv1i64_nxv1i64 +; CHECK: vsetvli {{.*}}, a0, e64,m1 +; CHECK: vmv.v.v {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmv.v.v.nxv1i64( + %0, + i32 %1) - ret %a + ret %a } -define @intrinsic_vmv.v.x_i_nxv4i8_i8(i32 %0) nounwind { -entry: -; CHECK-LABEL: intrinsic_vmv.v.x_i_nxv4i8_i8 -; CHECK: vsetvli {{.*}}, a0, e8,mf2 -; CHECK: vmv.v.i {{v[0-9]+}}, 9 - %a = call @llvm.riscv.vmv.v.x.nxv4i8.i8( - i8 9, - i32 %0) - - ret %a -} +declare @llvm.riscv.vmv.v.v.nxv2i64( + , + i32); -define @intrinsic_vmv.v.x_i_nxv8i8_i8(i32 %0) nounwind { +define @intrinsic_vmv.v.v_v_nxv2i64_nxv2i64( %0, i32 %1) nounwind { entry: -; CHECK-LABEL: intrinsic_vmv.v.x_i_nxv8i8_i8 -; CHECK: vsetvli {{.*}}, a0, e8,m1 -; CHECK: vmv.v.i {{v[0-9]+}}, 9 - %a = call @llvm.riscv.vmv.v.x.nxv8i8.i8( - i8 9, - i32 %0) +; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv2i64_nxv2i64 +; CHECK: vsetvli {{.*}}, a0, e64,m2 +; CHECK: vmv.v.v {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmv.v.v.nxv2i64( + %0, + i32 %1) - ret %a + ret %a } -define @intrinsic_vmv.v.x_i_nxv16i8_i8(i32 %0) nounwind { -entry: -; CHECK-LABEL: intrinsic_vmv.v.x_i_nxv16i8_i8 -; CHECK: vsetvli {{.*}}, a0, e8,m2 -; CHECK: vmv.v.i {{v[0-9]+}}, 9 - %a = call @llvm.riscv.vmv.v.x.nxv16i8.i8( - i8 9, - i32 %0) - - ret %a -} +declare @llvm.riscv.vmv.v.v.nxv4i64( + , + i32); -define @intrinsic_vmv.v.x_i_nxv32i8_i8(i32 %0) nounwind { +define @intrinsic_vmv.v.v_v_nxv4i64_nxv4i64( %0, i32 %1) nounwind { entry: -; CHECK-LABEL: intrinsic_vmv.v.x_i_nxv32i8_i8 -; CHECK: vsetvli {{.*}}, a0, e8,m4 -; CHECK: vmv.v.i {{v[0-9]+}}, 9 - %a = call @llvm.riscv.vmv.v.x.nxv32i8.i8( - i8 9, - i32 %0) +; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv4i64_nxv4i64 +; CHECK: vsetvli {{.*}}, a0, e64,m4 +; CHECK: vmv.v.v {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmv.v.v.nxv4i64( + %0, + i32 %1) - ret %a + ret %a } -define @intrinsic_vmv.v.x_i_nxv64i8_i8(i32 %0) nounwind { +declare @llvm.riscv.vmv.v.v.nxv8i64( + , + i32); + +define @intrinsic_vmv.v.v_v_nxv8i64_nxv8i64( %0, i32 %1) nounwind { entry: -; CHECK-LABEL: intrinsic_vmv.v.x_i_nxv64i8_i8 -; CHECK: vsetvli {{.*}}, a0, e8,m8 -; CHECK: vmv.v.i {{v[0-9]+}}, 9 - %a = call @llvm.riscv.vmv.v.x.nxv64i8.i8( - i8 9, - i32 %0) +; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv8i64_nxv8i64 +; CHECK: vsetvli {{.*}}, a0, e64,m8 +; CHECK: vmv.v.v {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmv.v.v.nxv8i64( + %0, + i32 %1) - ret %a + ret %a } -define @intrinsic_vmv.v.x_i_nxv1i16_i16(i32 %0) nounwind { +declare @llvm.riscv.vmv.v.v.nxv1f16( + , + i32); + +define @intrinsic_vmv.v.v_v_nxv1f16_nxv1f16( %0, i32 %1) nounwind { entry: -; CHECK-LABEL: intrinsic_vmv.v.x_i_nxv1i16_i16 +; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv1f16_nxv1f16 ; CHECK: vsetvli {{.*}}, a0, e16,mf4 -; CHECK: vmv.v.i {{v[0-9]+}}, 9 - %a = call @llvm.riscv.vmv.v.x.nxv1i16.i16( - i16 9, - i32 %0) +; CHECK: vmv.v.v {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmv.v.v.nxv1f16( + %0, + i32 %1) - ret %a + ret %a } -define @intrinsic_vmv.v.x_i_nxv2i16_i16(i32 %0) nounwind { +declare @llvm.riscv.vmv.v.v.nxv2f16( + , + i32); + +define @intrinsic_vmv.v.v_v_nxv2f16_nxv2f16( %0, i32 %1) nounwind { entry: -; CHECK-LABEL: intrinsic_vmv.v.x_i_nxv2i16_i16 +; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv2f16_nxv2f16 ; CHECK: vsetvli {{.*}}, a0, e16,mf2 -; CHECK: vmv.v.i {{v[0-9]+}}, 9 - %a = call @llvm.riscv.vmv.v.x.nxv2i16.i16( - i16 9, - i32 %0) +; CHECK: vmv.v.v {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmv.v.v.nxv2f16( + %0, + i32 %1) - ret %a + ret %a } -define @intrinsic_vmv.v.x_i_nxv4i16_i16(i32 %0) nounwind { +declare @llvm.riscv.vmv.v.v.nxv4f16( + , + i32); + +define @intrinsic_vmv.v.v_v_nxv4f16_nxv4f16( %0, i32 %1) nounwind { entry: -; CHECK-LABEL: intrinsic_vmv.v.x_i_nxv4i16_i16 +; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv4f16_nxv4f16 ; CHECK: vsetvli {{.*}}, a0, e16,m1 -; CHECK: vmv.v.i {{v[0-9]+}}, 9 - %a = call @llvm.riscv.vmv.v.x.nxv4i16.i16( - i16 9, - i32 %0) +; CHECK: vmv.v.v {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmv.v.v.nxv4f16( + %0, + i32 %1) - ret %a + ret %a } -define @intrinsic_vmv.v.x_i_nxv8i16_i16(i32 %0) nounwind { +declare @llvm.riscv.vmv.v.v.nxv8f16( + , + i32); + +define @intrinsic_vmv.v.v_v_nxv8f16_nxv8f16( %0, i32 %1) nounwind { entry: -; CHECK-LABEL: intrinsic_vmv.v.x_i_nxv8i16_i16 +; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv8f16_nxv8f16 ; CHECK: vsetvli {{.*}}, a0, e16,m2 -; CHECK: vmv.v.i {{v[0-9]+}}, 9 - %a = call @llvm.riscv.vmv.v.x.nxv8i16.i16( - i16 9, - i32 %0) +; CHECK: vmv.v.v {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmv.v.v.nxv8f16( + %0, + i32 %1) - ret %a + ret %a } -define @intrinsic_vmv.v.x_i_nxv16i16_i16(i32 %0) nounwind { +declare @llvm.riscv.vmv.v.v.nxv16f16( + , + i32); + +define @intrinsic_vmv.v.v_v_nxv16f16_nxv16f16( %0, i32 %1) nounwind { entry: -; CHECK-LABEL: intrinsic_vmv.v.x_i_nxv16i16_i16 +; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv16f16_nxv16f16 ; CHECK: vsetvli {{.*}}, a0, e16,m4 -; CHECK: vmv.v.i {{v[0-9]+}}, 9 - %a = call @llvm.riscv.vmv.v.x.nxv16i16.i16( - i16 9, - i32 %0) +; CHECK: vmv.v.v {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmv.v.v.nxv16f16( + %0, + i32 %1) - ret %a + ret %a } -define @intrinsic_vmv.v.x_i_nxv32i16_i16(i32 %0) nounwind { +declare @llvm.riscv.vmv.v.v.nxv32f16( + , + i32); + +define @intrinsic_vmv.v.v_v_nxv32f16_nxv32f16( %0, i32 %1) nounwind { entry: -; CHECK-LABEL: intrinsic_vmv.v.x_i_nxv32i16_i16 +; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv32f16_nxv32f16 ; CHECK: vsetvli {{.*}}, a0, e16,m8 -; CHECK: vmv.v.i {{v[0-9]+}}, 9 - %a = call @llvm.riscv.vmv.v.x.nxv32i16.i16( - i16 9, - i32 %0) +; CHECK: vmv.v.v {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmv.v.v.nxv32f16( + %0, + i32 %1) - ret %a + ret %a } -define @intrinsic_vmv.v.x_i_nxv1i32_i32(i32 %0) nounwind { +declare @llvm.riscv.vmv.v.v.nxv1f32( + , + i32); + +define @intrinsic_vmv.v.v_v_nxv1f32_nxv1f32( %0, i32 %1) nounwind { entry: -; CHECK-LABEL: intrinsic_vmv.v.x_i_nxv1i32_i32 +; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv1f32_nxv1f32 ; CHECK: vsetvli {{.*}}, a0, e32,mf2 -; CHECK: vmv.v.i {{v[0-9]+}}, 9 - %a = call @llvm.riscv.vmv.v.x.nxv1i32.i32( - i32 9, - i32 %0) +; CHECK: vmv.v.v {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmv.v.v.nxv1f32( + %0, + i32 %1) - ret %a + ret %a } -define @intrinsic_vmv.v.x_i_nxv2i32_i32(i32 %0) nounwind { +declare @llvm.riscv.vmv.v.v.nxv2f32( + , + i32); + +define @intrinsic_vmv.v.v_v_nxv2f32_nxv2f32( %0, i32 %1) nounwind { entry: -; CHECK-LABEL: intrinsic_vmv.v.x_i_nxv2i32_i32 +; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv2f32_nxv2f32 ; CHECK: vsetvli {{.*}}, a0, e32,m1 -; CHECK: vmv.v.i {{v[0-9]+}}, 9 - %a = call @llvm.riscv.vmv.v.x.nxv2i32.i32( - i32 9, - i32 %0) +; CHECK: vmv.v.v {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmv.v.v.nxv2f32( + %0, + i32 %1) - ret %a + ret %a } -define @intrinsic_vmv.v.x_i_nxv4i32_i32(i32 %0) nounwind { +declare @llvm.riscv.vmv.v.v.nxv4f32( + , + i32); + +define @intrinsic_vmv.v.v_v_nxv4f32_nxv4f32( %0, i32 %1) nounwind { entry: -; CHECK-LABEL: intrinsic_vmv.v.x_i_nxv4i32_i32 +; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv4f32_nxv4f32 ; CHECK: vsetvli {{.*}}, a0, e32,m2 -; CHECK: vmv.v.i {{v[0-9]+}}, 9 - %a = call @llvm.riscv.vmv.v.x.nxv4i32.i32( - i32 9, - i32 %0) +; CHECK: vmv.v.v {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmv.v.v.nxv4f32( + %0, + i32 %1) - ret %a + ret %a } -define @intrinsic_vmv.v.x_i_nxv8i32_i32(i32 %0) nounwind { +declare @llvm.riscv.vmv.v.v.nxv8f32( + , + i32); + +define @intrinsic_vmv.v.v_v_nxv8f32_nxv8f32( %0, i32 %1) nounwind { entry: -; CHECK-LABEL: intrinsic_vmv.v.x_i_nxv8i32_i32 +; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv8f32_nxv8f32 ; CHECK: vsetvli {{.*}}, a0, e32,m4 -; CHECK: vmv.v.i {{v[0-9]+}}, 9 - %a = call @llvm.riscv.vmv.v.x.nxv8i32.i32( - i32 9, - i32 %0) +; CHECK: vmv.v.v {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmv.v.v.nxv8f32( + %0, + i32 %1) - ret %a + ret %a } -define @intrinsic_vmv.v.x_i_nxv16i32_i32(i32 %0) nounwind { +declare @llvm.riscv.vmv.v.v.nxv16f32( + , + i32); + +define @intrinsic_vmv.v.v_v_nxv16f32_nxv16f32( %0, i32 %1) nounwind { entry: -; CHECK-LABEL: intrinsic_vmv.v.x_i_nxv16i32_i32 +; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv16f32_nxv16f32 ; CHECK: vsetvli {{.*}}, a0, e32,m8 -; CHECK: vmv.v.i {{v[0-9]+}}, 9 - %a = call @llvm.riscv.vmv.v.x.nxv16i32.i32( - i32 9, - i32 %0) +; CHECK: vmv.v.v {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmv.v.v.nxv16f32( + %0, + i32 %1) - ret %a + ret %a +} + +declare @llvm.riscv.vmv.v.v.nxv1f64( + , + i32); + +define @intrinsic_vmv.v.v_v_nxv1f64_nxv1f64( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv1f64_nxv1f64 +; CHECK: vsetvli {{.*}}, a0, e64,m1 +; CHECK: vmv.v.v {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmv.v.v.nxv1f64( + %0, + i32 %1) + + ret %a +} + +declare @llvm.riscv.vmv.v.v.nxv2f64( + , + i32); + +define @intrinsic_vmv.v.v_v_nxv2f64_nxv2f64( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv2f64_nxv2f64 +; CHECK: vsetvli {{.*}}, a0, e64,m2 +; CHECK: vmv.v.v {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmv.v.v.nxv2f64( + %0, + i32 %1) + + ret %a +} + +declare @llvm.riscv.vmv.v.v.nxv4f64( + , + i32); + +define @intrinsic_vmv.v.v_v_nxv4f64_nxv4f64( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv4f64_nxv4f64 +; CHECK: vsetvli {{.*}}, a0, e64,m4 +; CHECK: vmv.v.v {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmv.v.v.nxv4f64( + %0, + i32 %1) + + ret %a +} + +declare @llvm.riscv.vmv.v.v.nxv8f64( + , + i32); + +define @intrinsic_vmv.v.v_v_nxv8f64_nxv8f64( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv8f64_nxv8f64 +; CHECK: vsetvli {{.*}}, a0, e64,m8 +; CHECK: vmv.v.v {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmv.v.v.nxv8f64( + %0, + i32 %1) + + ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-rv64.ll index 21b22f6c3f0fc..2b0a414c64c74 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-rv64.ll @@ -1,617 +1,593 @@ -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+experimental-zfh -verify-machineinstrs \ ; RUN: --riscv-no-aliases < %s | FileCheck %s -declare @llvm.riscv.vmv.v.x.nxv1i8.i8( - i8, +declare @llvm.riscv.vmv.v.v.nxv1i8( + , i64); -define @intrinsic_vmv.v.x_x_nxv1i8_i8(i8 %0, i64 %1) nounwind { +define @intrinsic_vmv.v.v_v_nxv1i8_nxv1i8( %0, i64 %1) nounwind { entry: -; CHECK-LABEL: intrinsic_vmv.v.x_x_nxv1i8_i8 -; CHECK: vsetvli {{.*}}, a1, e8,mf8 -; CHECK: vmv.v.x {{v[0-9]+}}, a0 - %a = call @llvm.riscv.vmv.v.x.nxv1i8.i8( - i8 %0, +; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv1i8_nxv1i8 +; CHECK: vsetvli {{.*}}, a0, e8,mf8 +; CHECK: vmv.v.v {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmv.v.v.nxv1i8( + %0, i64 %1) ret %a } -declare @llvm.riscv.vmv.v.x.nxv2i8.i8( - i8, +declare @llvm.riscv.vmv.v.v.nxv2i8( + , i64); -define @intrinsic_vmv.v.x_x_nxv2i8_i8(i8 %0, i64 %1) nounwind { +define @intrinsic_vmv.v.v_v_nxv2i8_nxv2i8( %0, i64 %1) nounwind { entry: -; CHECK-LABEL: intrinsic_vmv.v.x_x_nxv2i8_i8 -; CHECK: vsetvli {{.*}}, a1, e8,mf4 -; CHECK: vmv.v.x {{v[0-9]+}}, a0 - %a = call @llvm.riscv.vmv.v.x.nxv2i8.i8( - i8 %0, +; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv2i8_nxv2i8 +; CHECK: vsetvli {{.*}}, a0, e8,mf4 +; CHECK: vmv.v.v {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmv.v.v.nxv2i8( + %0, i64 %1) ret %a } -declare @llvm.riscv.vmv.v.x.nxv4i8.i8( - i8, +declare @llvm.riscv.vmv.v.v.nxv4i8( + , i64); -define @intrinsic_vmv.v.x_x_nxv4i8_i8(i8 %0, i64 %1) nounwind { +define @intrinsic_vmv.v.v_v_nxv4i8_nxv4i8( %0, i64 %1) nounwind { entry: -; CHECK-LABEL: intrinsic_vmv.v.x_x_nxv4i8_i8 -; CHECK: vsetvli {{.*}}, a1, e8,mf2 -; CHECK: vmv.v.x {{v[0-9]+}}, a0 - %a = call @llvm.riscv.vmv.v.x.nxv4i8.i8( - i8 %0, +; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv4i8_nxv4i8 +; CHECK: vsetvli {{.*}}, a0, e8,mf2 +; CHECK: vmv.v.v {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmv.v.v.nxv4i8( + %0, i64 %1) ret %a } -declare @llvm.riscv.vmv.v.x.nxv8i8.i8( - i8, +declare @llvm.riscv.vmv.v.v.nxv8i8( + , i64); -define @intrinsic_vmv.v.x_x_nxv8i8_i8(i8 %0, i64 %1) nounwind { +define @intrinsic_vmv.v.v_v_nxv8i8_nxv8i8( %0, i64 %1) nounwind { entry: -; CHECK-LABEL: intrinsic_vmv.v.x_x_nxv8i8_i8 -; CHECK: vsetvli {{.*}}, a1, e8,m1 -; CHECK: vmv.v.x {{v[0-9]+}}, a0 - %a = call @llvm.riscv.vmv.v.x.nxv8i8.i8( - i8 %0, +; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv8i8_nxv8i8 +; CHECK: vsetvli {{.*}}, a0, e8,m1 +; CHECK: vmv.v.v {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmv.v.v.nxv8i8( + %0, i64 %1) ret %a } -declare @llvm.riscv.vmv.v.x.nxv16i8.i8( - i8, +declare @llvm.riscv.vmv.v.v.nxv16i8( + , i64); -define @intrinsic_vmv.v.x_x_nxv16i8_i8(i8 %0, i64 %1) nounwind { +define @intrinsic_vmv.v.v_v_nxv16i8_nxv16i8( %0, i64 %1) nounwind { entry: -; CHECK-LABEL: intrinsic_vmv.v.x_x_nxv16i8_i8 -; CHECK: vsetvli {{.*}}, a1, e8,m2 -; CHECK: vmv.v.x {{v[0-9]+}}, a0 - %a = call @llvm.riscv.vmv.v.x.nxv16i8.i8( - i8 %0, +; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv16i8_nxv16i8 +; CHECK: vsetvli {{.*}}, a0, e8,m2 +; CHECK: vmv.v.v {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmv.v.v.nxv16i8( + %0, i64 %1) ret %a } -declare @llvm.riscv.vmv.v.x.nxv32i8.i8( - i8, +declare @llvm.riscv.vmv.v.v.nxv32i8( + , i64); -define @intrinsic_vmv.v.x_x_nxv32i8_i8(i8 %0, i64 %1) nounwind { +define @intrinsic_vmv.v.v_v_nxv32i8_nxv32i8( %0, i64 %1) nounwind { entry: -; CHECK-LABEL: intrinsic_vmv.v.x_x_nxv32i8_i8 -; CHECK: vsetvli {{.*}}, a1, e8,m4 -; CHECK: vmv.v.x {{v[0-9]+}}, a0 - %a = call @llvm.riscv.vmv.v.x.nxv32i8.i8( - i8 %0, +; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv32i8_nxv32i8 +; CHECK: vsetvli {{.*}}, a0, e8,m4 +; CHECK: vmv.v.v {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmv.v.v.nxv32i8( + %0, i64 %1) ret %a } -declare @llvm.riscv.vmv.v.x.nxv64i8.i8( - i8, +declare @llvm.riscv.vmv.v.v.nxv64i8( + , i64); -define @intrinsic_vmv.v.x_x_nxv64i8_i8(i8 %0, i64 %1) nounwind { +define @intrinsic_vmv.v.v_v_nxv64i8_nxv64i8( %0, i64 %1) nounwind { entry: -; CHECK-LABEL: intrinsic_vmv.v.x_x_nxv64i8_i8 -; CHECK: vsetvli {{.*}}, a1, e8,m8 -; CHECK: vmv.v.x {{v[0-9]+}}, a0 - %a = call @llvm.riscv.vmv.v.x.nxv64i8.i8( - i8 %0, +; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv64i8_nxv64i8 +; CHECK: vsetvli {{.*}}, a0, e8,m8 +; CHECK: vmv.v.v {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmv.v.v.nxv64i8( + %0, i64 %1) ret %a } -declare @llvm.riscv.vmv.v.x.nxv1i16.i16( - i16, +declare @llvm.riscv.vmv.v.v.nxv1i16( + , i64); -define @intrinsic_vmv.v.x_x_nxv1i16_i16(i16 %0, i64 %1) nounwind { +define @intrinsic_vmv.v.v_v_nxv1i16_nxv1i16( %0, i64 %1) nounwind { entry: -; CHECK-LABEL: intrinsic_vmv.v.x_x_nxv1i16_i16 -; CHECK: vsetvli {{.*}}, a1, e16,mf4 -; CHECK: vmv.v.x {{v[0-9]+}}, a0 - %a = call @llvm.riscv.vmv.v.x.nxv1i16.i16( - i16 %0, +; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv1i16_nxv1i16 +; CHECK: vsetvli {{.*}}, a0, e16,mf4 +; CHECK: vmv.v.v {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmv.v.v.nxv1i16( + %0, i64 %1) ret %a } -declare @llvm.riscv.vmv.v.x.nxv2i16.i16( - i16, +declare @llvm.riscv.vmv.v.v.nxv2i16( + , i64); -define @intrinsic_vmv.v.x_x_nxv2i16_i16(i16 %0, i64 %1) nounwind { +define @intrinsic_vmv.v.v_v_nxv2i16_nxv2i16( %0, i64 %1) nounwind { entry: -; CHECK-LABEL: intrinsic_vmv.v.x_x_nxv2i16_i16 -; CHECK: vsetvli {{.*}}, a1, e16,mf2 -; CHECK: vmv.v.x {{v[0-9]+}}, a0 - %a = call @llvm.riscv.vmv.v.x.nxv2i16.i16( - i16 %0, +; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv2i16_nxv2i16 +; CHECK: vsetvli {{.*}}, a0, e16,mf2 +; CHECK: vmv.v.v {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmv.v.v.nxv2i16( + %0, i64 %1) ret %a } -declare @llvm.riscv.vmv.v.x.nxv4i16.i16( - i16, +declare @llvm.riscv.vmv.v.v.nxv4i16( + , i64); -define @intrinsic_vmv.v.x_x_nxv4i16_i16(i16 %0, i64 %1) nounwind { +define @intrinsic_vmv.v.v_v_nxv4i16_nxv4i16( %0, i64 %1) nounwind { entry: -; CHECK-LABEL: intrinsic_vmv.v.x_x_nxv4i16_i16 -; CHECK: vsetvli {{.*}}, a1, e16,m1 -; CHECK: vmv.v.x {{v[0-9]+}}, a0 - %a = call @llvm.riscv.vmv.v.x.nxv4i16.i16( - i16 %0, +; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv4i16_nxv4i16 +; CHECK: vsetvli {{.*}}, a0, e16,m1 +; CHECK: vmv.v.v {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmv.v.v.nxv4i16( + %0, i64 %1) ret %a } -declare @llvm.riscv.vmv.v.x.nxv8i16.i16( - i16, +declare @llvm.riscv.vmv.v.v.nxv8i16( + , i64); -define @intrinsic_vmv.v.x_x_nxv8i16_i16(i16 %0, i64 %1) nounwind { +define @intrinsic_vmv.v.v_v_nxv8i16_nxv8i16( %0, i64 %1) nounwind { entry: -; CHECK-LABEL: intrinsic_vmv.v.x_x_nxv8i16_i16 -; CHECK: vsetvli {{.*}}, a1, e16,m2 -; CHECK: vmv.v.x {{v[0-9]+}}, a0 - %a = call @llvm.riscv.vmv.v.x.nxv8i16.i16( - i16 %0, +; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv8i16_nxv8i16 +; CHECK: vsetvli {{.*}}, a0, e16,m2 +; CHECK: vmv.v.v {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmv.v.v.nxv8i16( + %0, i64 %1) ret %a } -declare @llvm.riscv.vmv.v.x.nxv16i16.i16( - i16, +declare @llvm.riscv.vmv.v.v.nxv16i16( + , i64); -define @intrinsic_vmv.v.x_x_nxv16i16_i16(i16 %0, i64 %1) nounwind { +define @intrinsic_vmv.v.v_v_nxv16i16_nxv16i16( %0, i64 %1) nounwind { entry: -; CHECK-LABEL: intrinsic_vmv.v.x_x_nxv16i16_i16 -; CHECK: vsetvli {{.*}}, a1, e16,m4 -; CHECK: vmv.v.x {{v[0-9]+}}, a0 - %a = call @llvm.riscv.vmv.v.x.nxv16i16.i16( - i16 %0, +; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv16i16_nxv16i16 +; CHECK: vsetvli {{.*}}, a0, e16,m4 +; CHECK: vmv.v.v {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmv.v.v.nxv16i16( + %0, i64 %1) ret %a } -declare @llvm.riscv.vmv.v.x.nxv32i16.i16( - i16, +declare @llvm.riscv.vmv.v.v.nxv32i16( + , i64); -define @intrinsic_vmv.v.x_x_nxv32i16_i16(i16 %0, i64 %1) nounwind { +define @intrinsic_vmv.v.v_v_nxv32i16_nxv32i16( %0, i64 %1) nounwind { entry: -; CHECK-LABEL: intrinsic_vmv.v.x_x_nxv32i16_i16 -; CHECK: vsetvli {{.*}}, a1, e16,m8 -; CHECK: vmv.v.x {{v[0-9]+}}, a0 - %a = call @llvm.riscv.vmv.v.x.nxv32i16.i16( - i16 %0, +; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv32i16_nxv32i16 +; CHECK: vsetvli {{.*}}, a0, e16,m8 +; CHECK: vmv.v.v {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmv.v.v.nxv32i16( + %0, i64 %1) ret %a } -declare @llvm.riscv.vmv.v.x.nxv1i32.i32( - i32, +declare @llvm.riscv.vmv.v.v.nxv1i32( + , i64); -define @intrinsic_vmv.v.x_x_nxv1i32_i32(i32 %0, i64 %1) nounwind { +define @intrinsic_vmv.v.v_v_nxv1i32_nxv1i32( %0, i64 %1) nounwind { entry: -; CHECK-LABEL: intrinsic_vmv.v.x_x_nxv1i32_i32 -; CHECK: vsetvli {{.*}}, a1, e32,mf2 -; CHECK: vmv.v.x {{v[0-9]+}}, a0 - %a = call @llvm.riscv.vmv.v.x.nxv1i32.i32( - i32 %0, +; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv1i32_nxv1i32 +; CHECK: vsetvli {{.*}}, a0, e32,mf2 +; CHECK: vmv.v.v {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmv.v.v.nxv1i32( + %0, i64 %1) ret %a } -declare @llvm.riscv.vmv.v.x.nxv2i32.i32( - i32, +declare @llvm.riscv.vmv.v.v.nxv2i32( + , i64); -define @intrinsic_vmv.v.x_x_nxv2i32_i32(i32 %0, i64 %1) nounwind { +define @intrinsic_vmv.v.v_v_nxv2i32_nxv2i32( %0, i64 %1) nounwind { entry: -; CHECK-LABEL: intrinsic_vmv.v.x_x_nxv2i32_i32 -; CHECK: vsetvli {{.*}}, a1, e32,m1 -; CHECK: vmv.v.x {{v[0-9]+}}, a0 - %a = call @llvm.riscv.vmv.v.x.nxv2i32.i32( - i32 %0, +; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv2i32_nxv2i32 +; CHECK: vsetvli {{.*}}, a0, e32,m1 +; CHECK: vmv.v.v {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmv.v.v.nxv2i32( + %0, i64 %1) ret %a } -declare @llvm.riscv.vmv.v.x.nxv4i32.i32( - i32, +declare @llvm.riscv.vmv.v.v.nxv4i32( + , i64); -define @intrinsic_vmv.v.x_x_nxv4i32_i32(i32 %0, i64 %1) nounwind { +define @intrinsic_vmv.v.v_v_nxv4i32_nxv4i32( %0, i64 %1) nounwind { entry: -; CHECK-LABEL: intrinsic_vmv.v.x_x_nxv4i32_i32 -; CHECK: vsetvli {{.*}}, a1, e32,m2 -; CHECK: vmv.v.x {{v[0-9]+}}, a0 - %a = call @llvm.riscv.vmv.v.x.nxv4i32.i32( - i32 %0, +; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv4i32_nxv4i32 +; CHECK: vsetvli {{.*}}, a0, e32,m2 +; CHECK: vmv.v.v {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmv.v.v.nxv4i32( + %0, i64 %1) ret %a } -declare @llvm.riscv.vmv.v.x.nxv8i32.i32( - i32, +declare @llvm.riscv.vmv.v.v.nxv8i32( + , i64); -define @intrinsic_vmv.v.x_x_nxv8i32_i32(i32 %0, i64 %1) nounwind { +define @intrinsic_vmv.v.v_v_nxv8i32_nxv8i32( %0, i64 %1) nounwind { entry: -; CHECK-LABEL: intrinsic_vmv.v.x_x_nxv8i32_i32 -; CHECK: vsetvli {{.*}}, a1, e32,m4 -; CHECK: vmv.v.x {{v[0-9]+}}, a0 - %a = call @llvm.riscv.vmv.v.x.nxv8i32.i32( - i32 %0, +; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv8i32_nxv8i32 +; CHECK: vsetvli {{.*}}, a0, e32,m4 +; CHECK: vmv.v.v {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmv.v.v.nxv8i32( + %0, i64 %1) ret %a } -declare @llvm.riscv.vmv.v.x.nxv16i32.i32( - i32, +declare @llvm.riscv.vmv.v.v.nxv16i32( + , i64); -define @intrinsic_vmv.v.x_x_nxv16i32_i32(i32 %0, i64 %1) nounwind { +define @intrinsic_vmv.v.v_v_nxv16i32_nxv16i32( %0, i64 %1) nounwind { entry: -; CHECK-LABEL: intrinsic_vmv.v.x_x_nxv16i32_i32 -; CHECK: vsetvli {{.*}}, a1, e32,m8 -; CHECK: vmv.v.x {{v[0-9]+}}, a0 - %a = call @llvm.riscv.vmv.v.x.nxv16i32.i32( - i32 %0, +; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv16i32_nxv16i32 +; CHECK: vsetvli {{.*}}, a0, e32,m8 +; CHECK: vmv.v.v {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmv.v.v.nxv16i32( + %0, i64 %1) ret %a } -declare @llvm.riscv.vmv.v.x.nxv1i64.i64( - i64, +declare @llvm.riscv.vmv.v.v.nxv1i64( + , i64); -define @intrinsic_vmv.v.x_x_nxv1i64_i64(i64 %0, i64 %1) nounwind { +define @intrinsic_vmv.v.v_v_nxv1i64_nxv1i64( %0, i64 %1) nounwind { entry: -; CHECK-LABEL: intrinsic_vmv.v.x_x_nxv1i64_i64 -; CHECK: vsetvli {{.*}}, a1, e64,m1 -; CHECK: vmv.v.x {{v[0-9]+}}, a0 - %a = call @llvm.riscv.vmv.v.x.nxv1i64.i64( - i64 %0, +; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv1i64_nxv1i64 +; CHECK: vsetvli {{.*}}, a0, e64,m1 +; CHECK: vmv.v.v {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmv.v.v.nxv1i64( + %0, i64 %1) ret %a } -declare @llvm.riscv.vmv.v.x.nxv2i64.i64( - i64, +declare @llvm.riscv.vmv.v.v.nxv2i64( + , i64); -define @intrinsic_vmv.v.x_x_nxv2i64_i64(i64 %0, i64 %1) nounwind { +define @intrinsic_vmv.v.v_v_nxv2i64_nxv2i64( %0, i64 %1) nounwind { entry: -; CHECK-LABEL: intrinsic_vmv.v.x_x_nxv2i64_i64 -; CHECK: vsetvli {{.*}}, a1, e64,m2 -; CHECK: vmv.v.x {{v[0-9]+}}, a0 - %a = call @llvm.riscv.vmv.v.x.nxv2i64.i64( - i64 %0, +; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv2i64_nxv2i64 +; CHECK: vsetvli {{.*}}, a0, e64,m2 +; CHECK: vmv.v.v {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmv.v.v.nxv2i64( + %0, i64 %1) ret %a } -declare @llvm.riscv.vmv.v.x.nxv4i64.i64( - i64, +declare @llvm.riscv.vmv.v.v.nxv4i64( + , i64); -define @intrinsic_vmv.v.x_x_nxv4i64_i64(i64 %0, i64 %1) nounwind { +define @intrinsic_vmv.v.v_v_nxv4i64_nxv4i64( %0, i64 %1) nounwind { entry: -; CHECK-LABEL: intrinsic_vmv.v.x_x_nxv4i64_i64 -; CHECK: vsetvli {{.*}}, a1, e64,m4 -; CHECK: vmv.v.x {{v[0-9]+}}, a0 - %a = call @llvm.riscv.vmv.v.x.nxv4i64.i64( - i64 %0, +; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv4i64_nxv4i64 +; CHECK: vsetvli {{.*}}, a0, e64,m4 +; CHECK: vmv.v.v {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmv.v.v.nxv4i64( + %0, i64 %1) ret %a } -declare @llvm.riscv.vmv.v.x.nxv8i64.i64( - i64, +declare @llvm.riscv.vmv.v.v.nxv8i64( + , i64); -define @intrinsic_vmv.v.x_x_nxv8i64_i64(i64 %0, i64 %1) nounwind { +define @intrinsic_vmv.v.v_v_nxv8i64_nxv8i64( %0, i64 %1) nounwind { entry: -; CHECK-LABEL: intrinsic_vmv.v.x_x_nxv8i64_i64 -; CHECK: vsetvli {{.*}}, a1, e64,m8 -; CHECK: vmv.v.x {{v[0-9]+}}, a0 - %a = call @llvm.riscv.vmv.v.x.nxv8i64.i64( - i64 %0, +; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv8i64_nxv8i64 +; CHECK: vsetvli {{.*}}, a0, e64,m8 +; CHECK: vmv.v.v {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmv.v.v.nxv8i64( + %0, i64 %1) ret %a } -define @intrinsic_vmv.v.x_i_nxv1i8_i8(i64 %0) nounwind { -entry: -; CHECK-LABEL: intrinsic_vmv.v.x_i_nxv1i8_i8 -; CHECK: vsetvli {{.*}}, a0, e8,mf8 -; CHECK: vmv.v.i {{v[0-9]+}}, 9 - %a = call @llvm.riscv.vmv.v.x.nxv1i8.i8( - i8 9, - i64 %0) - - ret %a -} - -define @intrinsic_vmv.v.x_i_nxv2i8_i8(i64 %0) nounwind { -entry: -; CHECK-LABEL: intrinsic_vmv.v.x_i_nxv2i8_i8 -; CHECK: vsetvli {{.*}}, a0, e8,mf4 -; CHECK: vmv.v.i {{v[0-9]+}}, 9 - %a = call @llvm.riscv.vmv.v.x.nxv2i8.i8( - i8 9, - i64 %0) - - ret %a -} - -define @intrinsic_vmv.v.x_i_nxv4i8_i8(i64 %0) nounwind { -entry: -; CHECK-LABEL: intrinsic_vmv.v.x_i_nxv4i8_i8 -; CHECK: vsetvli {{.*}}, a0, e8,mf2 -; CHECK: vmv.v.i {{v[0-9]+}}, 9 - %a = call @llvm.riscv.vmv.v.x.nxv4i8.i8( - i8 9, - i64 %0) - - ret %a -} - -define @intrinsic_vmv.v.x_i_nxv8i8_i8(i64 %0) nounwind { -entry: -; CHECK-LABEL: intrinsic_vmv.v.x_i_nxv8i8_i8 -; CHECK: vsetvli {{.*}}, a0, e8,m1 -; CHECK: vmv.v.i {{v[0-9]+}}, 9 - %a = call @llvm.riscv.vmv.v.x.nxv8i8.i8( - i8 9, - i64 %0) - - ret %a -} - -define @intrinsic_vmv.v.x_i_nxv16i8_i8(i64 %0) nounwind { -entry: -; CHECK-LABEL: intrinsic_vmv.v.x_i_nxv16i8_i8 -; CHECK: vsetvli {{.*}}, a0, e8,m2 -; CHECK: vmv.v.i {{v[0-9]+}}, 9 - %a = call @llvm.riscv.vmv.v.x.nxv16i8.i8( - i8 9, - i64 %0) - - ret %a -} - -define @intrinsic_vmv.v.x_i_nxv32i8_i8(i64 %0) nounwind { -entry: -; CHECK-LABEL: intrinsic_vmv.v.x_i_nxv32i8_i8 -; CHECK: vsetvli {{.*}}, a0, e8,m4 -; CHECK: vmv.v.i {{v[0-9]+}}, 9 - %a = call @llvm.riscv.vmv.v.x.nxv32i8.i8( - i8 9, - i64 %0) - - ret %a -} - -define @intrinsic_vmv.v.x_i_nxv64i8_i8(i64 %0) nounwind { -entry: -; CHECK-LABEL: intrinsic_vmv.v.x_i_nxv64i8_i8 -; CHECK: vsetvli {{.*}}, a0, e8,m8 -; CHECK: vmv.v.i {{v[0-9]+}}, 9 - %a = call @llvm.riscv.vmv.v.x.nxv64i8.i8( - i8 9, - i64 %0) - - ret %a -} +declare @llvm.riscv.vmv.v.v.nxv1f16( + , + i64); -define @intrinsic_vmv.v.x_i_nxv1i16_i16(i64 %0) nounwind { +define @intrinsic_vmv.v.v_v_nxv1f16_nxv1f16( %0, i64 %1) nounwind { entry: -; CHECK-LABEL: intrinsic_vmv.v.x_i_nxv1i16_i16 +; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv1f16_nxv1f16 ; CHECK: vsetvli {{.*}}, a0, e16,mf4 -; CHECK: vmv.v.i {{v[0-9]+}}, 9 - %a = call @llvm.riscv.vmv.v.x.nxv1i16.i16( - i16 9, - i64 %0) +; CHECK: vmv.v.v {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmv.v.v.nxv1f16( + %0, + i64 %1) - ret %a + ret %a } -define @intrinsic_vmv.v.x_i_nxv2i16_i16(i64 %0) nounwind { +declare @llvm.riscv.vmv.v.v.nxv2f16( + , + i64); + +define @intrinsic_vmv.v.v_v_nxv2f16_nxv2f16( %0, i64 %1) nounwind { entry: -; CHECK-LABEL: intrinsic_vmv.v.x_i_nxv2i16_i16 +; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv2f16_nxv2f16 ; CHECK: vsetvli {{.*}}, a0, e16,mf2 -; CHECK: vmv.v.i {{v[0-9]+}}, 9 - %a = call @llvm.riscv.vmv.v.x.nxv2i16.i16( - i16 9, - i64 %0) +; CHECK: vmv.v.v {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmv.v.v.nxv2f16( + %0, + i64 %1) - ret %a + ret %a } -define @intrinsic_vmv.v.x_i_nxv4i16_i16(i64 %0) nounwind { +declare @llvm.riscv.vmv.v.v.nxv4f16( + , + i64); + +define @intrinsic_vmv.v.v_v_nxv4f16_nxv4f16( %0, i64 %1) nounwind { entry: -; CHECK-LABEL: intrinsic_vmv.v.x_i_nxv4i16_i16 +; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv4f16_nxv4f16 ; CHECK: vsetvli {{.*}}, a0, e16,m1 -; CHECK: vmv.v.i {{v[0-9]+}}, 9 - %a = call @llvm.riscv.vmv.v.x.nxv4i16.i16( - i16 9, - i64 %0) +; CHECK: vmv.v.v {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmv.v.v.nxv4f16( + %0, + i64 %1) - ret %a + ret %a } -define @intrinsic_vmv.v.x_i_nxv8i16_i16(i64 %0) nounwind { +declare @llvm.riscv.vmv.v.v.nxv8f16( + , + i64); + +define @intrinsic_vmv.v.v_v_nxv8f16_nxv8f16( %0, i64 %1) nounwind { entry: -; CHECK-LABEL: intrinsic_vmv.v.x_i_nxv8i16_i16 +; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv8f16_nxv8f16 ; CHECK: vsetvli {{.*}}, a0, e16,m2 -; CHECK: vmv.v.i {{v[0-9]+}}, 9 - %a = call @llvm.riscv.vmv.v.x.nxv8i16.i16( - i16 9, - i64 %0) +; CHECK: vmv.v.v {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmv.v.v.nxv8f16( + %0, + i64 %1) - ret %a + ret %a } -define @intrinsic_vmv.v.x_i_nxv16i16_i16(i64 %0) nounwind { +declare @llvm.riscv.vmv.v.v.nxv16f16( + , + i64); + +define @intrinsic_vmv.v.v_v_nxv16f16_nxv16f16( %0, i64 %1) nounwind { entry: -; CHECK-LABEL: intrinsic_vmv.v.x_i_nxv16i16_i16 +; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv16f16_nxv16f16 ; CHECK: vsetvli {{.*}}, a0, e16,m4 -; CHECK: vmv.v.i {{v[0-9]+}}, 9 - %a = call @llvm.riscv.vmv.v.x.nxv16i16.i16( - i16 9, - i64 %0) +; CHECK: vmv.v.v {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmv.v.v.nxv16f16( + %0, + i64 %1) - ret %a + ret %a } -define @intrinsic_vmv.v.x_i_nxv32i16_i16(i64 %0) nounwind { +declare @llvm.riscv.vmv.v.v.nxv32f16( + , + i64); + +define @intrinsic_vmv.v.v_v_nxv32f16_nxv32f16( %0, i64 %1) nounwind { entry: -; CHECK-LABEL: intrinsic_vmv.v.x_i_nxv32i16_i16 +; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv32f16_nxv32f16 ; CHECK: vsetvli {{.*}}, a0, e16,m8 -; CHECK: vmv.v.i {{v[0-9]+}}, 9 - %a = call @llvm.riscv.vmv.v.x.nxv32i16.i16( - i16 9, - i64 %0) +; CHECK: vmv.v.v {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmv.v.v.nxv32f16( + %0, + i64 %1) - ret %a + ret %a } -define @intrinsic_vmv.v.x_i_nxv1i32_i32(i64 %0) nounwind { +declare @llvm.riscv.vmv.v.v.nxv1f32( + , + i64); + +define @intrinsic_vmv.v.v_v_nxv1f32_nxv1f32( %0, i64 %1) nounwind { entry: -; CHECK-LABEL: intrinsic_vmv.v.x_i_nxv1i32_i32 +; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv1f32_nxv1f32 ; CHECK: vsetvli {{.*}}, a0, e32,mf2 -; CHECK: vmv.v.i {{v[0-9]+}}, 9 - %a = call @llvm.riscv.vmv.v.x.nxv1i32.i32( - i32 9, - i64 %0) +; CHECK: vmv.v.v {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmv.v.v.nxv1f32( + %0, + i64 %1) - ret %a + ret %a } -define @intrinsic_vmv.v.x_i_nxv2i32_i32(i64 %0) nounwind { +declare @llvm.riscv.vmv.v.v.nxv2f32( + , + i64); + +define @intrinsic_vmv.v.v_v_nxv2f32_nxv2f32( %0, i64 %1) nounwind { entry: -; CHECK-LABEL: intrinsic_vmv.v.x_i_nxv2i32_i32 +; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv2f32_nxv2f32 ; CHECK: vsetvli {{.*}}, a0, e32,m1 -; CHECK: vmv.v.i {{v[0-9]+}}, 9 - %a = call @llvm.riscv.vmv.v.x.nxv2i32.i32( - i32 9, - i64 %0) +; CHECK: vmv.v.v {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmv.v.v.nxv2f32( + %0, + i64 %1) - ret %a + ret %a } -define @intrinsic_vmv.v.x_i_nxv4i32_i32(i64 %0) nounwind { +declare @llvm.riscv.vmv.v.v.nxv4f32( + , + i64); + +define @intrinsic_vmv.v.v_v_nxv4f32_nxv4f32( %0, i64 %1) nounwind { entry: -; CHECK-LABEL: intrinsic_vmv.v.x_i_nxv4i32_i32 +; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv4f32_nxv4f32 ; CHECK: vsetvli {{.*}}, a0, e32,m2 -; CHECK: vmv.v.i {{v[0-9]+}}, 9 - %a = call @llvm.riscv.vmv.v.x.nxv4i32.i32( - i32 9, - i64 %0) +; CHECK: vmv.v.v {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmv.v.v.nxv4f32( + %0, + i64 %1) - ret %a + ret %a } -define @intrinsic_vmv.v.x_i_nxv8i32_i32(i64 %0) nounwind { +declare @llvm.riscv.vmv.v.v.nxv8f32( + , + i64); + +define @intrinsic_vmv.v.v_v_nxv8f32_nxv8f32( %0, i64 %1) nounwind { entry: -; CHECK-LABEL: intrinsic_vmv.v.x_i_nxv8i32_i32 +; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv8f32_nxv8f32 ; CHECK: vsetvli {{.*}}, a0, e32,m4 -; CHECK: vmv.v.i {{v[0-9]+}}, 9 - %a = call @llvm.riscv.vmv.v.x.nxv8i32.i32( - i32 9, - i64 %0) +; CHECK: vmv.v.v {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmv.v.v.nxv8f32( + %0, + i64 %1) - ret %a + ret %a } -define @intrinsic_vmv.v.x_i_nxv16i32_i32(i64 %0) nounwind { +declare @llvm.riscv.vmv.v.v.nxv16f32( + , + i64); + +define @intrinsic_vmv.v.v_v_nxv16f32_nxv16f32( %0, i64 %1) nounwind { entry: -; CHECK-LABEL: intrinsic_vmv.v.x_i_nxv16i32_i32 +; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv16f32_nxv16f32 ; CHECK: vsetvli {{.*}}, a0, e32,m8 -; CHECK: vmv.v.i {{v[0-9]+}}, 9 - %a = call @llvm.riscv.vmv.v.x.nxv16i32.i32( - i32 9, - i64 %0) +; CHECK: vmv.v.v {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmv.v.v.nxv16f32( + %0, + i64 %1) - ret %a + ret %a } -define @intrinsic_vmv.v.x_i_nxv1i64_i64(i64 %0) nounwind { +declare @llvm.riscv.vmv.v.v.nxv1f64( + , + i64); + +define @intrinsic_vmv.v.v_v_nxv1f64_nxv1f64( %0, i64 %1) nounwind { entry: -; CHECK-LABEL: intrinsic_vmv.v.x_i_nxv1i64_i64 +; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv1f64_nxv1f64 ; CHECK: vsetvli {{.*}}, a0, e64,m1 -; CHECK: vmv.v.i {{v[0-9]+}}, 9 - %a = call @llvm.riscv.vmv.v.x.nxv1i64.i64( - i64 9, - i64 %0) +; CHECK: vmv.v.v {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmv.v.v.nxv1f64( + %0, + i64 %1) - ret %a + ret %a } -define @intrinsic_vmv.v.x_i_nxv2i64_i64(i64 %0) nounwind { +declare @llvm.riscv.vmv.v.v.nxv2f64( + , + i64); + +define @intrinsic_vmv.v.v_v_nxv2f64_nxv2f64( %0, i64 %1) nounwind { entry: -; CHECK-LABEL: intrinsic_vmv.v.x_i_nxv2i64_i64 +; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv2f64_nxv2f64 ; CHECK: vsetvli {{.*}}, a0, e64,m2 -; CHECK: vmv.v.i {{v[0-9]+}}, 9 - %a = call @llvm.riscv.vmv.v.x.nxv2i64.i64( - i64 9, - i64 %0) +; CHECK: vmv.v.v {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmv.v.v.nxv2f64( + %0, + i64 %1) - ret %a + ret %a } -define @intrinsic_vmv.v.x_i_nxv4i64_i64(i64 %0) nounwind { +declare @llvm.riscv.vmv.v.v.nxv4f64( + , + i64); + +define @intrinsic_vmv.v.v_v_nxv4f64_nxv4f64( %0, i64 %1) nounwind { entry: -; CHECK-LABEL: intrinsic_vmv.v.x_i_nxv4i64_i64 +; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv4f64_nxv4f64 ; CHECK: vsetvli {{.*}}, a0, e64,m4 -; CHECK: vmv.v.i {{v[0-9]+}}, 9 - %a = call @llvm.riscv.vmv.v.x.nxv4i64.i64( - i64 9, - i64 %0) +; CHECK: vmv.v.v {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmv.v.v.nxv4f64( + %0, + i64 %1) - ret %a + ret %a } -define @intrinsic_vmv.v.x_i_nxv8i64_i64(i64 %0) nounwind { +declare @llvm.riscv.vmv.v.v.nxv8f64( + , + i64); + +define @intrinsic_vmv.v.v_v_nxv8f64_nxv8f64( %0, i64 %1) nounwind { entry: -; CHECK-LABEL: intrinsic_vmv.v.x_i_nxv8i64_i64 +; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv8f64_nxv8f64 ; CHECK: vsetvli {{.*}}, a0, e64,m8 -; CHECK: vmv.v.i {{v[0-9]+}}, 9 - %a = call @llvm.riscv.vmv.v.x.nxv8i64.i64( - i64 9, - i64 %0) +; CHECK: vmv.v.v {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmv.v.v.nxv8f64( + %0, + i64 %1) - ret %a + ret %a } From e18734f87a6eb8d2fbd2d9c6690b99b057953935 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 21 Dec 2020 15:16:57 -0800 Subject: [PATCH 053/378] [RISCV] Use more precise type constraints for the vmv.v.v and vmv.v.x intrinsics. We can infer the input type from the result type. For vmv.v.v its the same. For vmv.v.x its the element type. --- llvm/include/llvm/IR/IntrinsicsRISCV.td | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td index f65f8e6ab7796..560f16afcc522 100644 --- a/llvm/include/llvm/IR/IntrinsicsRISCV.td +++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td @@ -294,14 +294,6 @@ let TargetPrefix = "riscv" in { let ExtendOperand = 3; } - // For vmv.v.v, vmv.v.x, vmv.v.i - // Input: (vector_in/scalar_in, vl) - class RISCVUnary : Intrinsic<[llvm_anyvector_ty], - [llvm_any_ty, llvm_anyint_ty], - [IntrNoMem] >, RISCVVIntrinsic { - let ExtendOperand = 1; - } - class RISCVTernaryAAAXNoMask : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyint_ty, @@ -440,8 +432,14 @@ let TargetPrefix = "riscv" in { defm vssubu : RISCVSaturatingBinaryAAX; defm vssub : RISCVSaturatingBinaryAAX; - def int_riscv_vmv_v_v : RISCVUnary; - def int_riscv_vmv_v_x : RISCVUnary; + def int_riscv_vmv_v_v : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, llvm_anyint_ty], + [IntrNoMem]>, RISCVVIntrinsic; + def int_riscv_vmv_v_x : Intrinsic<[llvm_anyint_ty], + [LLVMVectorElementType<0>, llvm_anyint_ty], + [IntrNoMem]>, RISCVVIntrinsic { + let ExtendOperand = 1; + } def int_riscv_vmv_x_s : Intrinsic<[LLVMVectorElementType<0>], [llvm_anyint_ty], From d7a6f3a1056a5f5212fa561a909fcfa502126074 Mon Sep 17 00:00:00 2001 From: Ta-Wei Tu Date: Tue, 22 Dec 2020 08:42:52 +0800 Subject: [PATCH 054/378] [LoopNest] Extend `LPMUpdater` and adaptor to handle loop-nest passes This is a follow-up patch of D87045. The patch implements "loop-nest mode" for `LPMUpdater` and `FunctionToLoopPassAdaptor` in which only top-level loops are operated. `createFunctionToLoopPassAdaptor` decides whether the returned adaptor is in loop-nest mode or not based on the given pass. If the pass is a loop-nest pass or the pass is a `LoopPassManager` which contains only loop-nest passes, the loop-nest version of adaptor is returned; otherwise, the normal (loop) version of adaptor is returned. Reviewed By: Whitney Differential Revision: https://reviews.llvm.org/D87531 --- .../llvm/Transforms/Scalar/LoopPassManager.h | 105 +++++++++++++++--- .../lib/Transforms/Scalar/LoopPassManager.cpp | 11 +- .../Transforms/Scalar/LoopPassManagerTest.cpp | 77 ++++++++++--- 3 files changed, 159 insertions(+), 34 deletions(-) diff --git a/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h b/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h index a0bef89b36cf3..2a342fcda3c2e 100644 --- a/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h +++ b/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h @@ -53,6 +53,16 @@ namespace llvm { // Forward declarations of an update tracking API used in the pass manager. class LPMUpdater; +namespace { + +template +using HasRunOnLoopT = decltype(std::declval().run( + std::declval(), std::declval(), + std::declval(), + std::declval())); + +} // namespace + // Explicit specialization and instantiation declarations for the pass manager. // See the comments on the definition of the specialization for details on how // it differs from the primary template. @@ -62,13 +72,6 @@ class PassManager> { -private: - template - using HasRunOnLoopT = decltype(std::declval().run( - std::declval(), std::declval(), - std::declval(), - std::declval())); - public: /// Construct a pass manager. /// @@ -154,6 +157,9 @@ class PassManagercontains(&L)) && "Cannot delete a loop outside of the " @@ -263,6 +278,8 @@ class LPMUpdater { /// loops within them will be visited in postorder as usual for the loop pass /// manager. void addChildLoops(ArrayRef NewChildLoops) { + assert(!LoopNestMode && + "Child loops should not be pushed in loop-nest mode."); // Insert ourselves back into the worklist first, as this loop should be // revisited after all the children have been processed. Worklist.insert(CurrentL); @@ -294,7 +311,10 @@ class LPMUpdater { "All of the new loops must be siblings of the current loop!"); #endif - appendLoopsToWorklist(NewSibLoops, Worklist); + if (LoopNestMode) + Worklist.insert(NewSibLoops); + else + appendLoopsToWorklist(NewSibLoops, Worklist); // No need to skip the current loop or revisit it, as sibling loops // shouldn't impact anything. @@ -324,6 +344,7 @@ class LPMUpdater { Loop *CurrentL; bool SkipCurrentLoop; + const bool LoopNestMode; #ifndef NDEBUG // In debug builds we also track the parent loop to implement asserts even in @@ -332,8 +353,8 @@ class LPMUpdater { #endif LPMUpdater(SmallPriorityWorklist &Worklist, - LoopAnalysisManager &LAM) - : Worklist(Worklist), LAM(LAM) {} + LoopAnalysisManager &LAM, bool LoopNestMode = false) + : Worklist(Worklist), LAM(LAM), LoopNestMode(LoopNestMode) {} }; template @@ -366,6 +387,15 @@ Optional LoopPassManager::runSinglePass( /// FunctionAnalysisManager it will run the \c LoopAnalysisManagerFunctionProxy /// analysis prior to running the loop passes over the function to enable a \c /// LoopAnalysisManager to be used within this run safely. +/// +/// The adaptor comes with two modes: the loop mode and the loop-nest mode, and +/// the worklist updater lived inside will be in the same mode as the adaptor +/// (refer to the documentation of \c LPMUpdater for more detailed explanation). +/// Specifically, in loop mode, all loops in the funciton will be pushed into +/// the worklist and processed by \p Pass, while only top-level loops are +/// processed in loop-nest mode. Please refer to the various specializations of +/// \fn createLoopFunctionToLoopPassAdaptor to see when loop mode and loop-nest +/// mode are used. class FunctionToLoopPassAdaptor : public PassInfoMixin { public: @@ -376,10 +406,12 @@ class FunctionToLoopPassAdaptor explicit FunctionToLoopPassAdaptor(std::unique_ptr Pass, bool UseMemorySSA = false, bool UseBlockFrequencyInfo = false, - bool DebugLogging = false) + bool DebugLogging = false, + bool LoopNestMode = false) : Pass(std::move(Pass)), LoopCanonicalizationFPM(DebugLogging), UseMemorySSA(UseMemorySSA), - UseBlockFrequencyInfo(UseBlockFrequencyInfo) { + UseBlockFrequencyInfo(UseBlockFrequencyInfo), + LoopNestMode(LoopNestMode) { LoopCanonicalizationFPM.addPass(LoopSimplifyPass()); LoopCanonicalizationFPM.addPass(LCSSAPass()); } @@ -389,6 +421,8 @@ class FunctionToLoopPassAdaptor static bool isRequired() { return true; } + bool isLoopNestMode() const { return LoopNestMode; } + private: std::unique_ptr Pass; @@ -396,12 +430,16 @@ class FunctionToLoopPassAdaptor bool UseMemorySSA = false; bool UseBlockFrequencyInfo = false; + const bool LoopNestMode; }; /// A function to deduce a loop pass type and wrap it in the templated /// adaptor. +/// +/// If \p Pass is a loop pass, the returned adaptor will be in loop mode. template -FunctionToLoopPassAdaptor +inline std::enable_if_t::value, + FunctionToLoopPassAdaptor> createFunctionToLoopPassAdaptor(LoopPassT Pass, bool UseMemorySSA = false, bool UseBlockFrequencyInfo = false, bool DebugLogging = false) { @@ -410,7 +448,46 @@ createFunctionToLoopPassAdaptor(LoopPassT Pass, bool UseMemorySSA = false, LoopStandardAnalysisResults &, LPMUpdater &>; return FunctionToLoopPassAdaptor( std::make_unique(std::move(Pass)), UseMemorySSA, - UseBlockFrequencyInfo, DebugLogging); + UseBlockFrequencyInfo, DebugLogging, false); +} + +/// If \p Pass is a loop-nest pass, \p Pass will first be wrapped into a +/// \c LoopPassManager and the returned adaptor will be in loop-nest mode. +template +inline std::enable_if_t::value, + FunctionToLoopPassAdaptor> +createFunctionToLoopPassAdaptor(LoopNestPassT Pass, bool UseMemorySSA = false, + bool UseBlockFrequencyInfo = false, + bool DebugLogging = false) { + LoopPassManager LPM(DebugLogging); + LPM.addPass(std::move(Pass)); + using PassModelT = + detail::PassModel; + return FunctionToLoopPassAdaptor(std::make_unique(std::move(LPM)), + UseMemorySSA, UseBlockFrequencyInfo, + DebugLogging, true); +} + +/// If \p Pass is an instance of \c LoopPassManager, the returned adaptor will +/// be in loop-nest mode if the pass manager contains only loop-nest passes. +template <> +inline FunctionToLoopPassAdaptor +createFunctionToLoopPassAdaptor(LoopPassManager LPM, + bool UseMemorySSA, + bool UseBlockFrequencyInfo, + bool DebugLogging) { + // Check if LPM contains any loop pass and if it does not, returns an adaptor + // in loop-nest mode. + using PassModelT = + detail::PassModel; + bool LoopNestMode = (LPM.getNumLoopPasses() == 0); + return FunctionToLoopPassAdaptor(std::make_unique(std::move(LPM)), + UseMemorySSA, UseBlockFrequencyInfo, + DebugLogging, LoopNestMode); } /// Pass for printing a loop's contents as textual IR. diff --git a/llvm/lib/Transforms/Scalar/LoopPassManager.cpp b/llvm/lib/Transforms/Scalar/LoopPassManager.cpp index 5bc41552e1a35..3fe8e72591143 100644 --- a/llvm/lib/Transforms/Scalar/LoopPassManager.cpp +++ b/llvm/lib/Transforms/Scalar/LoopPassManager.cpp @@ -222,11 +222,16 @@ PreservedAnalyses FunctionToLoopPassAdaptor::run(Function &F, // Register the worklist and loop analysis manager so that loop passes can // update them when they mutate the loop nest structure. - LPMUpdater Updater(Worklist, LAM); + LPMUpdater Updater(Worklist, LAM, LoopNestMode); // Add the loop nests in the reverse order of LoopInfo. See method // declaration. - appendLoopsToWorklist(LI, Worklist); + if (!LoopNestMode) { + appendLoopsToWorklist(LI, Worklist); + } else { + for (Loop *L : LI) + Worklist.insert(L); + } #ifndef NDEBUG PI.pushBeforeNonSkippedPassCallback([&LAR, &LI](StringRef PassID, Any IR) { @@ -247,6 +252,8 @@ PreservedAnalyses FunctionToLoopPassAdaptor::run(Function &F, do { Loop *L = Worklist.pop_back_val(); + assert(!(LoopNestMode && L->getParentLoop()) && + "L should be a top-level loop in loop-nest mode."); // Reset the update structure for this loop. Updater.CurrentL = L; diff --git a/llvm/unittests/Transforms/Scalar/LoopPassManagerTest.cpp b/llvm/unittests/Transforms/Scalar/LoopPassManagerTest.cpp index fc41bfa00ead6..a03d43b10ba41 100644 --- a/llvm/unittests/Transforms/Scalar/LoopPassManagerTest.cpp +++ b/llvm/unittests/Transforms/Scalar/LoopPassManagerTest.cpp @@ -1603,28 +1603,69 @@ TEST_F(LoopPassManagerTest, LoopDeletion) { } TEST_F(LoopPassManagerTest, HandleLoopNestPass) { - ::testing::InSequence MakeExpectationsSequenced; + ::testing::Sequence FSequence, GSequence; - EXPECT_CALL(MLPHandle, run(HasName("loop.0.0"), _, _, _)).Times(2); - EXPECT_CALL(MLPHandle, run(HasName("loop.0.1"), _, _, _)).Times(2); - EXPECT_CALL(MLPHandle, run(HasName("loop.0"), _, _, _)); - EXPECT_CALL(MLNPHandle, run(HasName("loop.0"), _, _, _)); - EXPECT_CALL(MLPHandle, run(HasName("loop.0"), _, _, _)); - EXPECT_CALL(MLNPHandle, run(HasName("loop.0"), _, _, _)); - EXPECT_CALL(MLPHandle, run(HasName("loop.g.0"), _, _, _)); - EXPECT_CALL(MLNPHandle, run(HasName("loop.g.0"), _, _, _)); - EXPECT_CALL(MLPHandle, run(HasName("loop.g.0"), _, _, _)); - EXPECT_CALL(MLNPHandle, run(HasName("loop.g.0"), _, _, _)); + EXPECT_CALL(MLPHandle, run(HasName("loop.0.0"), _, _, _)) + .Times(2) + .InSequence(FSequence); + EXPECT_CALL(MLPHandle, run(HasName("loop.0.1"), _, _, _)) + .Times(2) + .InSequence(FSequence); + EXPECT_CALL(MLPHandle, run(HasName("loop.0"), _, _, _)).InSequence(FSequence); + EXPECT_CALL(MLNPHandle, run(HasName("loop.0"), _, _, _)) + .InSequence(FSequence); + EXPECT_CALL(MLPHandle, run(HasName("loop.0"), _, _, _)).InSequence(FSequence); + EXPECT_CALL(MLNPHandle, run(HasName("loop.0"), _, _, _)) + .InSequence(FSequence); + EXPECT_CALL(MLPHandle, run(HasName("loop.g.0"), _, _, _)) + .InSequence(GSequence); + EXPECT_CALL(MLNPHandle, run(HasName("loop.g.0"), _, _, _)) + .InSequence(GSequence); + EXPECT_CALL(MLPHandle, run(HasName("loop.g.0"), _, _, _)) + .InSequence(GSequence); + EXPECT_CALL(MLNPHandle, run(HasName("loop.g.0"), _, _, _)) + .InSequence(GSequence); - LoopPassManager LPM(true); - LPM.addPass(MLPHandle.getPass()); - LPM.addPass(MLNPHandle.getPass()); - LPM.addPass(MLPHandle.getPass()); - LPM.addPass(MLNPHandle.getPass()); + EXPECT_CALL(MLNPHandle, run(HasName("loop.0"), _, _, _)) + .InSequence(FSequence); + EXPECT_CALL(MLNPHandle, run(HasName("loop.g.0"), _, _, _)) + .InSequence(GSequence); + + EXPECT_CALL(MLNPHandle, run(HasName("loop.0"), _, _, _)) + .InSequence(FSequence); + EXPECT_CALL(MLNPHandle, run(HasName("loop.g.0"), _, _, _)) + .InSequence(GSequence); ModulePassManager MPM(true); - MPM.addPass(createModuleToFunctionPassAdaptor( - createFunctionToLoopPassAdaptor(std::move(LPM)))); + FunctionPassManager FPM(true); + + { + LoopPassManager LPM(true); + LPM.addPass(MLPHandle.getPass()); + LPM.addPass(MLNPHandle.getPass()); + LPM.addPass(MLPHandle.getPass()); + LPM.addPass(MLNPHandle.getPass()); + + auto Adaptor = createFunctionToLoopPassAdaptor(std::move(LPM)); + ASSERT_FALSE(Adaptor.isLoopNestMode()); + FPM.addPass(std::move(Adaptor)); + } + + { + auto Adaptor = createFunctionToLoopPassAdaptor(MLNPHandle.getPass()); + ASSERT_TRUE(Adaptor.isLoopNestMode()); + FPM.addPass(std::move(Adaptor)); + } + + { + LoopPassManager LPM(true); + LPM.addPass(MLNPHandle.getPass()); + auto Adaptor = createFunctionToLoopPassAdaptor(MLNPHandle.getPass()); + ASSERT_TRUE(Adaptor.isLoopNestMode()); + FPM.addPass(std::move(Adaptor)); + } + + MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); MPM.run(*M, MAM); } From 8c85aae6c5b282eee1f58e67334e809016f04776 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Mon, 21 Dec 2020 17:18:28 -0800 Subject: [PATCH 055/378] [MC][test] Reorganize .cfi_* tests Delete tests which are covered by others. --- llvm/test/MC/ELF/{fde.s => cfi-fde-encoding.s} | 0 .../{X86/return-column.s => ELF/cfi-return-column.s} | 0 llvm/test/MC/{X86 => ELF}/cfi-scope-errors.s | 0 .../cfi-scope-errors2.s} | 7 +------ llvm/test/MC/{X86 => ELF}/expand-var.s | 0 llvm/test/MC/X86/{pr38826.s => cfi_offset-eip.s} | 0 ...i_def_cfa-crash.s => compact-unwind-cfi_def_cfa.s} | 0 llvm/test/MC/X86/fde-reloc.s | 11 ----------- 8 files changed, 1 insertion(+), 17 deletions(-) rename llvm/test/MC/ELF/{fde.s => cfi-fde-encoding.s} (100%) rename llvm/test/MC/{X86/return-column.s => ELF/cfi-return-column.s} (100%) rename llvm/test/MC/{X86 => ELF}/cfi-scope-errors.s (100%) rename llvm/test/MC/{X86/cfi-open-within-another-crash.s => ELF/cfi-scope-errors2.s} (87%) rename llvm/test/MC/{X86 => ELF}/expand-var.s (100%) rename llvm/test/MC/X86/{pr38826.s => cfi_offset-eip.s} (100%) rename llvm/test/MC/X86/{cfi_def_cfa-crash.s => compact-unwind-cfi_def_cfa.s} (100%) delete mode 100644 llvm/test/MC/X86/fde-reloc.s diff --git a/llvm/test/MC/ELF/fde.s b/llvm/test/MC/ELF/cfi-fde-encoding.s similarity index 100% rename from llvm/test/MC/ELF/fde.s rename to llvm/test/MC/ELF/cfi-fde-encoding.s diff --git a/llvm/test/MC/X86/return-column.s b/llvm/test/MC/ELF/cfi-return-column.s similarity index 100% rename from llvm/test/MC/X86/return-column.s rename to llvm/test/MC/ELF/cfi-return-column.s diff --git a/llvm/test/MC/X86/cfi-scope-errors.s b/llvm/test/MC/ELF/cfi-scope-errors.s similarity index 100% rename from llvm/test/MC/X86/cfi-scope-errors.s rename to llvm/test/MC/ELF/cfi-scope-errors.s diff --git a/llvm/test/MC/X86/cfi-open-within-another-crash.s b/llvm/test/MC/ELF/cfi-scope-errors2.s similarity index 87% rename from llvm/test/MC/X86/cfi-open-within-another-crash.s rename to llvm/test/MC/ELF/cfi-scope-errors2.s index 6ec5338f54b47..d29c05636d0df 100644 --- a/llvm/test/MC/X86/cfi-open-within-another-crash.s +++ b/llvm/test/MC/ELF/cfi-scope-errors2.s @@ -4,15 +4,10 @@ # RUN: not llvm-mc %s -filetype=obj -triple=x86_64-unknown-linux -o /dev/null 2>&1 | FileCheck %s .text -.globl proc_one -proc_one: .cfi_startproc - + .text -.globl proc_two -proc_two: .cfi_startproc # CHECK: [[#@LINE]]:1: error: starting new .cfi frame before finishing the previous one .cfi_endproc - diff --git a/llvm/test/MC/X86/expand-var.s b/llvm/test/MC/ELF/expand-var.s similarity index 100% rename from llvm/test/MC/X86/expand-var.s rename to llvm/test/MC/ELF/expand-var.s diff --git a/llvm/test/MC/X86/pr38826.s b/llvm/test/MC/X86/cfi_offset-eip.s similarity index 100% rename from llvm/test/MC/X86/pr38826.s rename to llvm/test/MC/X86/cfi_offset-eip.s diff --git a/llvm/test/MC/X86/cfi_def_cfa-crash.s b/llvm/test/MC/X86/compact-unwind-cfi_def_cfa.s similarity index 100% rename from llvm/test/MC/X86/cfi_def_cfa-crash.s rename to llvm/test/MC/X86/compact-unwind-cfi_def_cfa.s diff --git a/llvm/test/MC/X86/fde-reloc.s b/llvm/test/MC/X86/fde-reloc.s deleted file mode 100644 index 63ac976621884..0000000000000 --- a/llvm/test/MC/X86/fde-reloc.s +++ /dev/null @@ -1,11 +0,0 @@ -// RUN: llvm-mc -filetype=obj %s -o - -triple x86_64-pc-linux | llvm-objdump -r - | FileCheck --check-prefix=X86-64 %s -// RUN: llvm-mc -filetype=obj %s -o - -triple i686-pc-linux | llvm-objdump -r - | FileCheck --check-prefix=I686 %s - -// PR15448 - -func: - .cfi_startproc - .cfi_endproc - -// X86-64: R_X86_64_PC32 -// I686: R_386_PC32 From 6e2af4d6046995abf1003ebacfce95415010d574 Mon Sep 17 00:00:00 2001 From: Tres Popp Date: Tue, 22 Dec 2020 02:19:31 +0100 Subject: [PATCH 056/378] Revert "[mlir] Add SmallVector sizes" This reverts commit 83274a0773f6a20abdc848b448009e0195c42166. Fixed in a555ca8b3d67 --- mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp index 09c662c74477d..7b1300da1783f 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp @@ -198,8 +198,8 @@ void SwitchOp::build(OpBuilder &builder, OperationState &result, Value value, ArrayRef caseValues, BlockRange caseDestinations, ArrayRef caseOperands, ArrayRef branchWeights) { - SmallVector flattenedCaseOperands; - SmallVector caseOperandOffsets; + SmallVector flattenedCaseOperands; + SmallVector caseOperandOffsets; int32_t offset = 0; for (ValueRange operands : caseOperands) { flattenedCaseOperands.append(operands.begin(), operands.end()); @@ -230,8 +230,8 @@ parseSwitchOpCases(OpAsmParser &parser, ElementsAttr &caseValues, SmallVectorImpl &caseOperands, SmallVectorImpl &caseOperandTypes, ElementsAttr &caseOperandOffsets) { - SmallVector values; - SmallVector offsets; + SmallVector values; + SmallVector offsets; int32_t value, offset = 0; do { OptionalParseResult integerParseResult = parser.parseOptionalInteger(value); @@ -243,7 +243,7 @@ parseSwitchOpCases(OpAsmParser &parser, ElementsAttr &caseValues, values.push_back(value); Block *destination; - SmallVector operands; + SmallVector operands; if (parser.parseColon() || parser.parseSuccessor(destination)) return failure(); if (!parser.parseOptionalLParen()) { From b15ba2cf6fde9b7e8599dc9c5afc412a98aba5be Mon Sep 17 00:00:00 2001 From: ShihPo Hung Date: Sun, 20 Dec 2020 22:41:47 -0800 Subject: [PATCH 057/378] [RISCV] Add intrinsics for vmacc/vnmsac/vmadd/vnmsub instructions This defines vmadd, vmacc, vnmsub, and vnmsac intrinsics and lower to V instructions. Authored-by: Roger Ferrer Ibanez Co-Authored-by: ShihPo Hung Differential Revision: https://reviews.llvm.org/D93632 --- llvm/include/llvm/IR/IntrinsicsRISCV.td | 23 + .../Target/RISCV/RISCVInstrInfoVPseudos.td | 55 + llvm/test/CodeGen/RISCV/rvv/vmacc-rv32.ll | 1261 ++++++++++++++ llvm/test/CodeGen/RISCV/rvv/vmacc-rv64.ll | 1513 +++++++++++++++++ llvm/test/CodeGen/RISCV/rvv/vmadd-rv32.ll | 1261 ++++++++++++++ llvm/test/CodeGen/RISCV/rvv/vmadd-rv64.ll | 1513 +++++++++++++++++ llvm/test/CodeGen/RISCV/rvv/vnmsac-rv32.ll | 1261 ++++++++++++++ llvm/test/CodeGen/RISCV/rvv/vnmsac-rv64.ll | 1513 +++++++++++++++++ llvm/test/CodeGen/RISCV/rvv/vnmsub-rv32.ll | 1261 ++++++++++++++ llvm/test/CodeGen/RISCV/rvv/vnmsub-rv64.ll | 1513 +++++++++++++++++ 10 files changed, 11174 insertions(+) create mode 100644 llvm/test/CodeGen/RISCV/rvv/vmacc-rv32.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vmacc-rv64.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vmadd-rv32.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vmadd-rv64.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vnmsac-rv32.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vnmsac-rv64.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vnmsub-rv32.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vnmsub-rv64.ll diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td index 560f16afcc522..dc1d56322191a 100644 --- a/llvm/include/llvm/IR/IntrinsicsRISCV.td +++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td @@ -304,6 +304,20 @@ let TargetPrefix = "riscv" in { [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyint_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<1>], [IntrNoMem]>, RISCVVIntrinsic; + class RISCVTernaryAAXANoMask + : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, llvm_any_ty, LLVMMatchType<0>, + llvm_anyint_ty], + [IntrNoMem]>, RISCVVIntrinsic { + let ExtendOperand = 2; + } + class RISCVTernaryAAXAMask + : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, llvm_any_ty, LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty], + [IntrNoMem]>, RISCVVIntrinsic { + let ExtendOperand = 2; + } multiclass RISCVUSLoad { def "int_riscv_" # NAME : RISCVUSLoad; @@ -358,6 +372,10 @@ let TargetPrefix = "riscv" in { def "int_riscv_" # NAME : RISCVTernaryAAAXNoMask; def "int_riscv_" # NAME # "_mask" : RISCVTernaryAAAXMask; } + multiclass RISCVTernaryAAXA { + def "int_riscv_" # NAME : RISCVTernaryAAXANoMask; + def "int_riscv_" # NAME # "_mask" : RISCVTernaryAAXAMask; + } defm vle : RISCVUSLoad; defm vse : RISCVUSStore; @@ -418,6 +436,11 @@ let TargetPrefix = "riscv" in { defm vwmulu : RISCVBinaryABX; defm vwmulsu : RISCVBinaryABX; + defm vmacc : RISCVTernaryAAXA; + defm vnmsac : RISCVTernaryAAXA; + defm vmadd : RISCVTernaryAAXA; + defm vnmsub : RISCVTernaryAAXA; + defm vfadd : RISCVBinaryAAX; defm vfsub : RISCVBinaryAAX; defm vfrsub : RISCVBinaryAAX; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td index 0a2aad3bc2bd2..a5c5c04542e14 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -854,16 +854,31 @@ multiclass VPseudoTernary { + foreach m = MxList.m in + defm _VV : VPseudoTernary; +} + multiclass VPseudoTernaryV_VX { foreach m = MxList.m in defm _VX : VPseudoTernary; } +multiclass VPseudoTernaryV_VX_AAXA { + foreach m = MxList.m in + defm _VX : VPseudoTernary; +} + multiclass VPseudoTernaryV_VI { foreach m = MxList.m in defm _VI : VPseudoTernary; } +multiclass VPseudoTernaryV_VV_VX_AAXA { + defm "" : VPseudoTernaryV_VV; + defm "" : VPseudoTernaryV_VX_AAXA; +} + multiclass VPseudoTernaryV_VX_VI { defm "" : VPseudoTernaryV_VX; defm "" : VPseudoTernaryV_VI; @@ -1475,6 +1490,15 @@ multiclass VPatTernary; } +multiclass VPatTernaryV_VV vtilist> { + foreach vti = vtilist in + defm : VPatTernary; +} + multiclass VPatTernaryV_VX vtilist> { foreach vti = vtilist in @@ -1484,6 +1508,15 @@ multiclass VPatTernaryV_VX; } +multiclass VPatTernaryV_VX_AAXA vtilist> { + foreach vti = vtilist in + defm : VPatTernary; +} + multiclass VPatTernaryV_VI vtilist, Operand Imm_type> { foreach vti = vtilist in @@ -1493,6 +1526,12 @@ multiclass VPatTernaryV_VI; } +multiclass VPatTernaryV_VV_VX_AAXA vtilist> { + defm "" : VPatTernaryV_VV; + defm "" : VPatTernaryV_VX_AAXA; +} + multiclass VPatTernaryV_VX_VI vtilist, Operand Imm_type = simm5> { defm "" : VPatTernaryV_VX; @@ -1649,6 +1688,14 @@ defm PseudoVWMUL : VPseudoBinaryW_VV_VX; defm PseudoVWMULU : VPseudoBinaryW_VV_VX; defm PseudoVWMULSU : VPseudoBinaryW_VV_VX; +//===----------------------------------------------------------------------===// +// 12.13. Vector Single-Width Integer Multiply-Add Instructions +//===----------------------------------------------------------------------===// +defm PseudoVMACC : VPseudoTernaryV_VV_VX_AAXA; +defm PseudoVNMSAC : VPseudoTernaryV_VV_VX_AAXA; +defm PseudoVMADD : VPseudoTernaryV_VV_VX_AAXA; +defm PseudoVNMSUB : VPseudoTernaryV_VV_VX_AAXA; + //===----------------------------------------------------------------------===// // 12.17. Vector Integer Move Instructions //===----------------------------------------------------------------------===// @@ -1975,6 +2022,14 @@ defm "" : VPatBinaryW_VV_VX<"int_riscv_vwmul", "PseudoVWMUL", AllWidenableIntVec defm "" : VPatBinaryW_VV_VX<"int_riscv_vwmulu", "PseudoVWMULU", AllWidenableIntVectors>; defm "" : VPatBinaryW_VV_VX<"int_riscv_vwmulsu", "PseudoVWMULSU", AllWidenableIntVectors>; +//===----------------------------------------------------------------------===// +// 12.13. Vector Single-Width Integer Multiply-Add Instructions +//===----------------------------------------------------------------------===// +defm "" : VPatTernaryV_VV_VX_AAXA<"int_riscv_vmadd", "PseudoVMADD", AllIntegerVectors>; +defm "" : VPatTernaryV_VV_VX_AAXA<"int_riscv_vnmsub", "PseudoVNMSUB", AllIntegerVectors>; +defm "" : VPatTernaryV_VV_VX_AAXA<"int_riscv_vmacc", "PseudoVMACC", AllIntegerVectors>; +defm "" : VPatTernaryV_VV_VX_AAXA<"int_riscv_vnmsac", "PseudoVNMSAC", AllIntegerVectors>; + //===----------------------------------------------------------------------===// // 12.17. Vector Integer Move Instructions //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/RISCV/rvv/vmacc-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmacc-rv32.ll new file mode 100644 index 0000000000000..24a2a82f6ff05 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vmacc-rv32.ll @@ -0,0 +1,1261 @@ +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare @llvm.riscv.vmacc.nxv1i8.nxv1i8( + , + , + , + i32); + +define @intrinsic_vmacc_vv_nxv1i8_nxv1i8_nxv1i8( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_vv_nxv1i8_nxv1i8_nxv1i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmacc.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmacc.nxv1i8.nxv1i8( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmacc.mask.nxv1i8.nxv1i8( + , + , + , + , + i32); + +define @intrinsic_vmacc_mask_vv_nxv1i8_nxv1i8_nxv1i8( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_mask_vv_nxv1i8_nxv1i8_nxv1i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmacc.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmacc.mask.nxv1i8.nxv1i8( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmacc.nxv2i8.nxv2i8( + , + , + , + i32); + +define @intrinsic_vmacc_vv_nxv2i8_nxv2i8_nxv2i8( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_vv_nxv2i8_nxv2i8_nxv2i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmacc.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmacc.nxv2i8.nxv2i8( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmacc.mask.nxv2i8.nxv2i8( + , + , + , + , + i32); + +define @intrinsic_vmacc_mask_vv_nxv2i8_nxv2i8_nxv2i8( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_mask_vv_nxv2i8_nxv2i8_nxv2i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmacc.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmacc.mask.nxv2i8.nxv2i8( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmacc.nxv4i8.nxv4i8( + , + , + , + i32); + +define @intrinsic_vmacc_vv_nxv4i8_nxv4i8_nxv4i8( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_vv_nxv4i8_nxv4i8_nxv4i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmacc.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmacc.nxv4i8.nxv4i8( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmacc.mask.nxv4i8.nxv4i8( + , + , + , + , + i32); + +define @intrinsic_vmacc_mask_vv_nxv4i8_nxv4i8_nxv4i8( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_mask_vv_nxv4i8_nxv4i8_nxv4i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmacc.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmacc.mask.nxv4i8.nxv4i8( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmacc.nxv8i8.nxv8i8( + , + , + , + i32); + +define @intrinsic_vmacc_vv_nxv8i8_nxv8i8_nxv8i8( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_vv_nxv8i8_nxv8i8_nxv8i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmacc.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmacc.nxv8i8.nxv8i8( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmacc.mask.nxv8i8.nxv8i8( + , + , + , + , + i32); + +define @intrinsic_vmacc_mask_vv_nxv8i8_nxv8i8_nxv8i8( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_mask_vv_nxv8i8_nxv8i8_nxv8i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmacc.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmacc.mask.nxv8i8.nxv8i8( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmacc.nxv16i8.nxv16i8( + , + , + , + i32); + +define @intrinsic_vmacc_vv_nxv16i8_nxv16i8_nxv16i8( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_vv_nxv16i8_nxv16i8_nxv16i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmacc.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmacc.nxv16i8.nxv16i8( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmacc.mask.nxv16i8.nxv16i8( + , + , + , + , + i32); + +define @intrinsic_vmacc_mask_vv_nxv16i8_nxv16i8_nxv16i8( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_mask_vv_nxv16i8_nxv16i8_nxv16i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmacc.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmacc.mask.nxv16i8.nxv16i8( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmacc.nxv32i8.nxv32i8( + , + , + , + i32); + +define @intrinsic_vmacc_vv_nxv32i8_nxv32i8_nxv32i8( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_vv_nxv32i8_nxv32i8_nxv32i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmacc.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmacc.nxv32i8.nxv32i8( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmacc.mask.nxv32i8.nxv32i8( + , + , + , + , + i32); + +define @intrinsic_vmacc_mask_vv_nxv32i8_nxv32i8_nxv32i8( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_mask_vv_nxv32i8_nxv32i8_nxv32i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmacc.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmacc.mask.nxv32i8.nxv32i8( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmacc.nxv1i16.nxv1i16( + , + , + , + i32); + +define @intrinsic_vmacc_vv_nxv1i16_nxv1i16_nxv1i16( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_vv_nxv1i16_nxv1i16_nxv1i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmacc.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmacc.nxv1i16.nxv1i16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmacc.mask.nxv1i16.nxv1i16( + , + , + , + , + i32); + +define @intrinsic_vmacc_mask_vv_nxv1i16_nxv1i16_nxv1i16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_mask_vv_nxv1i16_nxv1i16_nxv1i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmacc.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmacc.mask.nxv1i16.nxv1i16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmacc.nxv2i16.nxv2i16( + , + , + , + i32); + +define @intrinsic_vmacc_vv_nxv2i16_nxv2i16_nxv2i16( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_vv_nxv2i16_nxv2i16_nxv2i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmacc.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmacc.nxv2i16.nxv2i16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmacc.mask.nxv2i16.nxv2i16( + , + , + , + , + i32); + +define @intrinsic_vmacc_mask_vv_nxv2i16_nxv2i16_nxv2i16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_mask_vv_nxv2i16_nxv2i16_nxv2i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmacc.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmacc.mask.nxv2i16.nxv2i16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmacc.nxv4i16.nxv4i16( + , + , + , + i32); + +define @intrinsic_vmacc_vv_nxv4i16_nxv4i16_nxv4i16( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_vv_nxv4i16_nxv4i16_nxv4i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmacc.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmacc.nxv4i16.nxv4i16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmacc.mask.nxv4i16.nxv4i16( + , + , + , + , + i32); + +define @intrinsic_vmacc_mask_vv_nxv4i16_nxv4i16_nxv4i16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_mask_vv_nxv4i16_nxv4i16_nxv4i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmacc.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmacc.mask.nxv4i16.nxv4i16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmacc.nxv8i16.nxv8i16( + , + , + , + i32); + +define @intrinsic_vmacc_vv_nxv8i16_nxv8i16_nxv8i16( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_vv_nxv8i16_nxv8i16_nxv8i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmacc.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmacc.nxv8i16.nxv8i16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmacc.mask.nxv8i16.nxv8i16( + , + , + , + , + i32); + +define @intrinsic_vmacc_mask_vv_nxv8i16_nxv8i16_nxv8i16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_mask_vv_nxv8i16_nxv8i16_nxv8i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmacc.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmacc.mask.nxv8i16.nxv8i16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmacc.nxv16i16.nxv16i16( + , + , + , + i32); + +define @intrinsic_vmacc_vv_nxv16i16_nxv16i16_nxv16i16( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_vv_nxv16i16_nxv16i16_nxv16i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmacc.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmacc.nxv16i16.nxv16i16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmacc.mask.nxv16i16.nxv16i16( + , + , + , + , + i32); + +define @intrinsic_vmacc_mask_vv_nxv16i16_nxv16i16_nxv16i16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_mask_vv_nxv16i16_nxv16i16_nxv16i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmacc.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmacc.mask.nxv16i16.nxv16i16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmacc.nxv1i32.nxv1i32( + , + , + , + i32); + +define @intrinsic_vmacc_vv_nxv1i32_nxv1i32_nxv1i32( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_vv_nxv1i32_nxv1i32_nxv1i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmacc.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmacc.nxv1i32.nxv1i32( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmacc.mask.nxv1i32.nxv1i32( + , + , + , + , + i32); + +define @intrinsic_vmacc_mask_vv_nxv1i32_nxv1i32_nxv1i32( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_mask_vv_nxv1i32_nxv1i32_nxv1i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmacc.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmacc.mask.nxv1i32.nxv1i32( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmacc.nxv2i32.nxv2i32( + , + , + , + i32); + +define @intrinsic_vmacc_vv_nxv2i32_nxv2i32_nxv2i32( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_vv_nxv2i32_nxv2i32_nxv2i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmacc.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmacc.nxv2i32.nxv2i32( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmacc.mask.nxv2i32.nxv2i32( + , + , + , + , + i32); + +define @intrinsic_vmacc_mask_vv_nxv2i32_nxv2i32_nxv2i32( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_mask_vv_nxv2i32_nxv2i32_nxv2i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmacc.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmacc.mask.nxv2i32.nxv2i32( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmacc.nxv4i32.nxv4i32( + , + , + , + i32); + +define @intrinsic_vmacc_vv_nxv4i32_nxv4i32_nxv4i32( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_vv_nxv4i32_nxv4i32_nxv4i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmacc.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmacc.nxv4i32.nxv4i32( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmacc.mask.nxv4i32.nxv4i32( + , + , + , + , + i32); + +define @intrinsic_vmacc_mask_vv_nxv4i32_nxv4i32_nxv4i32( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_mask_vv_nxv4i32_nxv4i32_nxv4i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmacc.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmacc.mask.nxv4i32.nxv4i32( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmacc.nxv8i32.nxv8i32( + , + , + , + i32); + +define @intrinsic_vmacc_vv_nxv8i32_nxv8i32_nxv8i32( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_vv_nxv8i32_nxv8i32_nxv8i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmacc.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmacc.nxv8i32.nxv8i32( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmacc.mask.nxv8i32.nxv8i32( + , + , + , + , + i32); + +define @intrinsic_vmacc_mask_vv_nxv8i32_nxv8i32_nxv8i32( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_mask_vv_nxv8i32_nxv8i32_nxv8i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmacc.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmacc.mask.nxv8i32.nxv8i32( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmacc.nxv1i8.i8( + , + i8, + , + i32); + +define @intrinsic_vmacc_vx_nxv1i8_i8_nxv1i8( %0, i8 %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_vx_nxv1i8_i8_nxv1i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmacc.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vmacc.nxv1i8.i8( + %0, + i8 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmacc.mask.nxv1i8.i8( + , + i8, + , + , + i32); + +define @intrinsic_vmacc_mask_vx_nxv1i8_i8_nxv1i8( %0, i8 %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_mask_vx_nxv1i8_i8_nxv1i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmacc.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmacc.mask.nxv1i8.i8( + %0, + i8 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmacc.nxv2i8.i8( + , + i8, + , + i32); + +define @intrinsic_vmacc_vx_nxv2i8_i8_nxv2i8( %0, i8 %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_vx_nxv2i8_i8_nxv2i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmacc.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vmacc.nxv2i8.i8( + %0, + i8 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmacc.mask.nxv2i8.i8( + , + i8, + , + , + i32); + +define @intrinsic_vmacc_mask_vx_nxv2i8_i8_nxv2i8( %0, i8 %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_mask_vx_nxv2i8_i8_nxv2i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmacc.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmacc.mask.nxv2i8.i8( + %0, + i8 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmacc.nxv4i8.i8( + , + i8, + , + i32); + +define @intrinsic_vmacc_vx_nxv4i8_i8_nxv4i8( %0, i8 %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_vx_nxv4i8_i8_nxv4i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmacc.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vmacc.nxv4i8.i8( + %0, + i8 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmacc.mask.nxv4i8.i8( + , + i8, + , + , + i32); + +define @intrinsic_vmacc_mask_vx_nxv4i8_i8_nxv4i8( %0, i8 %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_mask_vx_nxv4i8_i8_nxv4i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmacc.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmacc.mask.nxv4i8.i8( + %0, + i8 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmacc.nxv8i8.i8( + , + i8, + , + i32); + +define @intrinsic_vmacc_vx_nxv8i8_i8_nxv8i8( %0, i8 %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_vx_nxv8i8_i8_nxv8i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmacc.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vmacc.nxv8i8.i8( + %0, + i8 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmacc.mask.nxv8i8.i8( + , + i8, + , + , + i32); + +define @intrinsic_vmacc_mask_vx_nxv8i8_i8_nxv8i8( %0, i8 %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_mask_vx_nxv8i8_i8_nxv8i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmacc.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmacc.mask.nxv8i8.i8( + %0, + i8 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmacc.nxv16i8.i8( + , + i8, + , + i32); + +define @intrinsic_vmacc_vx_nxv16i8_i8_nxv16i8( %0, i8 %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_vx_nxv16i8_i8_nxv16i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmacc.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vmacc.nxv16i8.i8( + %0, + i8 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmacc.mask.nxv16i8.i8( + , + i8, + , + , + i32); + +define @intrinsic_vmacc_mask_vx_nxv16i8_i8_nxv16i8( %0, i8 %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_mask_vx_nxv16i8_i8_nxv16i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmacc.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmacc.mask.nxv16i8.i8( + %0, + i8 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmacc.nxv32i8.i8( + , + i8, + , + i32); + +define @intrinsic_vmacc_vx_nxv32i8_i8_nxv32i8( %0, i8 %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_vx_nxv32i8_i8_nxv32i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmacc.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vmacc.nxv32i8.i8( + %0, + i8 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmacc.mask.nxv32i8.i8( + , + i8, + , + , + i32); + +define @intrinsic_vmacc_mask_vx_nxv32i8_i8_nxv32i8( %0, i8 %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_mask_vx_nxv32i8_i8_nxv32i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmacc.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmacc.mask.nxv32i8.i8( + %0, + i8 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmacc.nxv1i16.i16( + , + i16, + , + i32); + +define @intrinsic_vmacc_vx_nxv1i16_i16_nxv1i16( %0, i16 %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_vx_nxv1i16_i16_nxv1i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmacc.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vmacc.nxv1i16.i16( + %0, + i16 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmacc.mask.nxv1i16.i16( + , + i16, + , + , + i32); + +define @intrinsic_vmacc_mask_vx_nxv1i16_i16_nxv1i16( %0, i16 %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_mask_vx_nxv1i16_i16_nxv1i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmacc.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmacc.mask.nxv1i16.i16( + %0, + i16 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmacc.nxv2i16.i16( + , + i16, + , + i32); + +define @intrinsic_vmacc_vx_nxv2i16_i16_nxv2i16( %0, i16 %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_vx_nxv2i16_i16_nxv2i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmacc.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vmacc.nxv2i16.i16( + %0, + i16 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmacc.mask.nxv2i16.i16( + , + i16, + , + , + i32); + +define @intrinsic_vmacc_mask_vx_nxv2i16_i16_nxv2i16( %0, i16 %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_mask_vx_nxv2i16_i16_nxv2i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmacc.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmacc.mask.nxv2i16.i16( + %0, + i16 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmacc.nxv4i16.i16( + , + i16, + , + i32); + +define @intrinsic_vmacc_vx_nxv4i16_i16_nxv4i16( %0, i16 %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_vx_nxv4i16_i16_nxv4i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmacc.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vmacc.nxv4i16.i16( + %0, + i16 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmacc.mask.nxv4i16.i16( + , + i16, + , + , + i32); + +define @intrinsic_vmacc_mask_vx_nxv4i16_i16_nxv4i16( %0, i16 %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_mask_vx_nxv4i16_i16_nxv4i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmacc.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmacc.mask.nxv4i16.i16( + %0, + i16 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmacc.nxv8i16.i16( + , + i16, + , + i32); + +define @intrinsic_vmacc_vx_nxv8i16_i16_nxv8i16( %0, i16 %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_vx_nxv8i16_i16_nxv8i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmacc.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vmacc.nxv8i16.i16( + %0, + i16 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmacc.mask.nxv8i16.i16( + , + i16, + , + , + i32); + +define @intrinsic_vmacc_mask_vx_nxv8i16_i16_nxv8i16( %0, i16 %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_mask_vx_nxv8i16_i16_nxv8i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmacc.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmacc.mask.nxv8i16.i16( + %0, + i16 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmacc.nxv16i16.i16( + , + i16, + , + i32); + +define @intrinsic_vmacc_vx_nxv16i16_i16_nxv16i16( %0, i16 %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_vx_nxv16i16_i16_nxv16i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmacc.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vmacc.nxv16i16.i16( + %0, + i16 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmacc.mask.nxv16i16.i16( + , + i16, + , + , + i32); + +define @intrinsic_vmacc_mask_vx_nxv16i16_i16_nxv16i16( %0, i16 %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_mask_vx_nxv16i16_i16_nxv16i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmacc.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmacc.mask.nxv16i16.i16( + %0, + i16 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmacc.nxv1i32.i32( + , + i32, + , + i32); + +define @intrinsic_vmacc_vx_nxv1i32_i32_nxv1i32( %0, i32 %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_vx_nxv1i32_i32_nxv1i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmacc.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vmacc.nxv1i32.i32( + %0, + i32 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmacc.mask.nxv1i32.i32( + , + i32, + , + , + i32); + +define @intrinsic_vmacc_mask_vx_nxv1i32_i32_nxv1i32( %0, i32 %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_mask_vx_nxv1i32_i32_nxv1i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmacc.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmacc.mask.nxv1i32.i32( + %0, + i32 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmacc.nxv2i32.i32( + , + i32, + , + i32); + +define @intrinsic_vmacc_vx_nxv2i32_i32_nxv2i32( %0, i32 %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_vx_nxv2i32_i32_nxv2i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmacc.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vmacc.nxv2i32.i32( + %0, + i32 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmacc.mask.nxv2i32.i32( + , + i32, + , + , + i32); + +define @intrinsic_vmacc_mask_vx_nxv2i32_i32_nxv2i32( %0, i32 %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_mask_vx_nxv2i32_i32_nxv2i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmacc.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmacc.mask.nxv2i32.i32( + %0, + i32 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmacc.nxv4i32.i32( + , + i32, + , + i32); + +define @intrinsic_vmacc_vx_nxv4i32_i32_nxv4i32( %0, i32 %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_vx_nxv4i32_i32_nxv4i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmacc.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vmacc.nxv4i32.i32( + %0, + i32 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmacc.mask.nxv4i32.i32( + , + i32, + , + , + i32); + +define @intrinsic_vmacc_mask_vx_nxv4i32_i32_nxv4i32( %0, i32 %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_mask_vx_nxv4i32_i32_nxv4i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmacc.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmacc.mask.nxv4i32.i32( + %0, + i32 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmacc.nxv8i32.i32( + , + i32, + , + i32); + +define @intrinsic_vmacc_vx_nxv8i32_i32_nxv8i32( %0, i32 %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_vx_nxv8i32_i32_nxv8i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmacc.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vmacc.nxv8i32.i32( + %0, + i32 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmacc.mask.nxv8i32.i32( + , + i32, + , + , + i32); + +define @intrinsic_vmacc_mask_vx_nxv8i32_i32_nxv8i32( %0, i32 %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_mask_vx_nxv8i32_i32_nxv8i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmacc.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmacc.mask.nxv8i32.i32( + %0, + i32 %1, + %2, + %3, + i32 %4) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vmacc-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmacc-rv64.ll new file mode 100644 index 0000000000000..9e7d36368d34f --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vmacc-rv64.ll @@ -0,0 +1,1513 @@ +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare @llvm.riscv.vmacc.nxv1i8.nxv1i8( + , + , + , + i64); + +define @intrinsic_vmacc_vv_nxv1i8_nxv1i8_nxv1i8( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_vv_nxv1i8_nxv1i8_nxv1i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmacc.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmacc.nxv1i8.nxv1i8( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmacc.mask.nxv1i8.nxv1i8( + , + , + , + , + i64); + +define @intrinsic_vmacc_mask_vv_nxv1i8_nxv1i8_nxv1i8( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_mask_vv_nxv1i8_nxv1i8_nxv1i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmacc.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmacc.mask.nxv1i8.nxv1i8( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmacc.nxv2i8.nxv2i8( + , + , + , + i64); + +define @intrinsic_vmacc_vv_nxv2i8_nxv2i8_nxv2i8( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_vv_nxv2i8_nxv2i8_nxv2i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmacc.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmacc.nxv2i8.nxv2i8( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmacc.mask.nxv2i8.nxv2i8( + , + , + , + , + i64); + +define @intrinsic_vmacc_mask_vv_nxv2i8_nxv2i8_nxv2i8( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_mask_vv_nxv2i8_nxv2i8_nxv2i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmacc.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmacc.mask.nxv2i8.nxv2i8( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmacc.nxv4i8.nxv4i8( + , + , + , + i64); + +define @intrinsic_vmacc_vv_nxv4i8_nxv4i8_nxv4i8( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_vv_nxv4i8_nxv4i8_nxv4i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmacc.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmacc.nxv4i8.nxv4i8( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmacc.mask.nxv4i8.nxv4i8( + , + , + , + , + i64); + +define @intrinsic_vmacc_mask_vv_nxv4i8_nxv4i8_nxv4i8( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_mask_vv_nxv4i8_nxv4i8_nxv4i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmacc.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmacc.mask.nxv4i8.nxv4i8( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmacc.nxv8i8.nxv8i8( + , + , + , + i64); + +define @intrinsic_vmacc_vv_nxv8i8_nxv8i8_nxv8i8( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_vv_nxv8i8_nxv8i8_nxv8i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmacc.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmacc.nxv8i8.nxv8i8( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmacc.mask.nxv8i8.nxv8i8( + , + , + , + , + i64); + +define @intrinsic_vmacc_mask_vv_nxv8i8_nxv8i8_nxv8i8( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_mask_vv_nxv8i8_nxv8i8_nxv8i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmacc.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmacc.mask.nxv8i8.nxv8i8( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmacc.nxv16i8.nxv16i8( + , + , + , + i64); + +define @intrinsic_vmacc_vv_nxv16i8_nxv16i8_nxv16i8( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_vv_nxv16i8_nxv16i8_nxv16i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmacc.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmacc.nxv16i8.nxv16i8( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmacc.mask.nxv16i8.nxv16i8( + , + , + , + , + i64); + +define @intrinsic_vmacc_mask_vv_nxv16i8_nxv16i8_nxv16i8( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_mask_vv_nxv16i8_nxv16i8_nxv16i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmacc.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmacc.mask.nxv16i8.nxv16i8( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmacc.nxv32i8.nxv32i8( + , + , + , + i64); + +define @intrinsic_vmacc_vv_nxv32i8_nxv32i8_nxv32i8( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_vv_nxv32i8_nxv32i8_nxv32i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmacc.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmacc.nxv32i8.nxv32i8( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmacc.mask.nxv32i8.nxv32i8( + , + , + , + , + i64); + +define @intrinsic_vmacc_mask_vv_nxv32i8_nxv32i8_nxv32i8( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_mask_vv_nxv32i8_nxv32i8_nxv32i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmacc.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmacc.mask.nxv32i8.nxv32i8( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmacc.nxv1i16.nxv1i16( + , + , + , + i64); + +define @intrinsic_vmacc_vv_nxv1i16_nxv1i16_nxv1i16( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_vv_nxv1i16_nxv1i16_nxv1i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmacc.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmacc.nxv1i16.nxv1i16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmacc.mask.nxv1i16.nxv1i16( + , + , + , + , + i64); + +define @intrinsic_vmacc_mask_vv_nxv1i16_nxv1i16_nxv1i16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_mask_vv_nxv1i16_nxv1i16_nxv1i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmacc.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmacc.mask.nxv1i16.nxv1i16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmacc.nxv2i16.nxv2i16( + , + , + , + i64); + +define @intrinsic_vmacc_vv_nxv2i16_nxv2i16_nxv2i16( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_vv_nxv2i16_nxv2i16_nxv2i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmacc.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmacc.nxv2i16.nxv2i16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmacc.mask.nxv2i16.nxv2i16( + , + , + , + , + i64); + +define @intrinsic_vmacc_mask_vv_nxv2i16_nxv2i16_nxv2i16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_mask_vv_nxv2i16_nxv2i16_nxv2i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmacc.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmacc.mask.nxv2i16.nxv2i16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmacc.nxv4i16.nxv4i16( + , + , + , + i64); + +define @intrinsic_vmacc_vv_nxv4i16_nxv4i16_nxv4i16( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_vv_nxv4i16_nxv4i16_nxv4i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmacc.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmacc.nxv4i16.nxv4i16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmacc.mask.nxv4i16.nxv4i16( + , + , + , + , + i64); + +define @intrinsic_vmacc_mask_vv_nxv4i16_nxv4i16_nxv4i16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_mask_vv_nxv4i16_nxv4i16_nxv4i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmacc.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmacc.mask.nxv4i16.nxv4i16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmacc.nxv8i16.nxv8i16( + , + , + , + i64); + +define @intrinsic_vmacc_vv_nxv8i16_nxv8i16_nxv8i16( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_vv_nxv8i16_nxv8i16_nxv8i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmacc.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmacc.nxv8i16.nxv8i16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmacc.mask.nxv8i16.nxv8i16( + , + , + , + , + i64); + +define @intrinsic_vmacc_mask_vv_nxv8i16_nxv8i16_nxv8i16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_mask_vv_nxv8i16_nxv8i16_nxv8i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmacc.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmacc.mask.nxv8i16.nxv8i16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmacc.nxv16i16.nxv16i16( + , + , + , + i64); + +define @intrinsic_vmacc_vv_nxv16i16_nxv16i16_nxv16i16( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_vv_nxv16i16_nxv16i16_nxv16i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmacc.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmacc.nxv16i16.nxv16i16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmacc.mask.nxv16i16.nxv16i16( + , + , + , + , + i64); + +define @intrinsic_vmacc_mask_vv_nxv16i16_nxv16i16_nxv16i16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_mask_vv_nxv16i16_nxv16i16_nxv16i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmacc.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmacc.mask.nxv16i16.nxv16i16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmacc.nxv1i32.nxv1i32( + , + , + , + i64); + +define @intrinsic_vmacc_vv_nxv1i32_nxv1i32_nxv1i32( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_vv_nxv1i32_nxv1i32_nxv1i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmacc.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmacc.nxv1i32.nxv1i32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmacc.mask.nxv1i32.nxv1i32( + , + , + , + , + i64); + +define @intrinsic_vmacc_mask_vv_nxv1i32_nxv1i32_nxv1i32( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_mask_vv_nxv1i32_nxv1i32_nxv1i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmacc.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmacc.mask.nxv1i32.nxv1i32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmacc.nxv2i32.nxv2i32( + , + , + , + i64); + +define @intrinsic_vmacc_vv_nxv2i32_nxv2i32_nxv2i32( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_vv_nxv2i32_nxv2i32_nxv2i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmacc.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmacc.nxv2i32.nxv2i32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmacc.mask.nxv2i32.nxv2i32( + , + , + , + , + i64); + +define @intrinsic_vmacc_mask_vv_nxv2i32_nxv2i32_nxv2i32( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_mask_vv_nxv2i32_nxv2i32_nxv2i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmacc.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmacc.mask.nxv2i32.nxv2i32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmacc.nxv4i32.nxv4i32( + , + , + , + i64); + +define @intrinsic_vmacc_vv_nxv4i32_nxv4i32_nxv4i32( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_vv_nxv4i32_nxv4i32_nxv4i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmacc.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmacc.nxv4i32.nxv4i32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmacc.mask.nxv4i32.nxv4i32( + , + , + , + , + i64); + +define @intrinsic_vmacc_mask_vv_nxv4i32_nxv4i32_nxv4i32( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_mask_vv_nxv4i32_nxv4i32_nxv4i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmacc.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmacc.mask.nxv4i32.nxv4i32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmacc.nxv8i32.nxv8i32( + , + , + , + i64); + +define @intrinsic_vmacc_vv_nxv8i32_nxv8i32_nxv8i32( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_vv_nxv8i32_nxv8i32_nxv8i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmacc.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmacc.nxv8i32.nxv8i32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmacc.mask.nxv8i32.nxv8i32( + , + , + , + , + i64); + +define @intrinsic_vmacc_mask_vv_nxv8i32_nxv8i32_nxv8i32( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_mask_vv_nxv8i32_nxv8i32_nxv8i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmacc.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmacc.mask.nxv8i32.nxv8i32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmacc.nxv1i64.nxv1i64( + , + , + , + i64); + +define @intrinsic_vmacc_vv_nxv1i64_nxv1i64_nxv1i64( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_vv_nxv1i64_nxv1i64_nxv1i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vmacc.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmacc.nxv1i64.nxv1i64( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmacc.mask.nxv1i64.nxv1i64( + , + , + , + , + i64); + +define @intrinsic_vmacc_mask_vv_nxv1i64_nxv1i64_nxv1i64( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_mask_vv_nxv1i64_nxv1i64_nxv1i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vmacc.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmacc.mask.nxv1i64.nxv1i64( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmacc.nxv2i64.nxv2i64( + , + , + , + i64); + +define @intrinsic_vmacc_vv_nxv2i64_nxv2i64_nxv2i64( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_vv_nxv2i64_nxv2i64_nxv2i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vmacc.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmacc.nxv2i64.nxv2i64( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmacc.mask.nxv2i64.nxv2i64( + , + , + , + , + i64); + +define @intrinsic_vmacc_mask_vv_nxv2i64_nxv2i64_nxv2i64( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_mask_vv_nxv2i64_nxv2i64_nxv2i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vmacc.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmacc.mask.nxv2i64.nxv2i64( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmacc.nxv4i64.nxv4i64( + , + , + , + i64); + +define @intrinsic_vmacc_vv_nxv4i64_nxv4i64_nxv4i64( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_vv_nxv4i64_nxv4i64_nxv4i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vmacc.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmacc.nxv4i64.nxv4i64( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmacc.mask.nxv4i64.nxv4i64( + , + , + , + , + i64); + +define @intrinsic_vmacc_mask_vv_nxv4i64_nxv4i64_nxv4i64( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_mask_vv_nxv4i64_nxv4i64_nxv4i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vmacc.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmacc.mask.nxv4i64.nxv4i64( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmacc.nxv1i8.i8( + , + i8, + , + i64); + +define @intrinsic_vmacc_vx_nxv1i8_i8_nxv1i8( %0, i8 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_vx_nxv1i8_i8_nxv1i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmacc.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vmacc.nxv1i8.i8( + %0, + i8 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmacc.mask.nxv1i8.i8( + , + i8, + , + , + i64); + +define @intrinsic_vmacc_mask_vx_nxv1i8_i8_nxv1i8( %0, i8 %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_mask_vx_nxv1i8_i8_nxv1i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmacc.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmacc.mask.nxv1i8.i8( + %0, + i8 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmacc.nxv2i8.i8( + , + i8, + , + i64); + +define @intrinsic_vmacc_vx_nxv2i8_i8_nxv2i8( %0, i8 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_vx_nxv2i8_i8_nxv2i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmacc.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vmacc.nxv2i8.i8( + %0, + i8 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmacc.mask.nxv2i8.i8( + , + i8, + , + , + i64); + +define @intrinsic_vmacc_mask_vx_nxv2i8_i8_nxv2i8( %0, i8 %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_mask_vx_nxv2i8_i8_nxv2i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmacc.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmacc.mask.nxv2i8.i8( + %0, + i8 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmacc.nxv4i8.i8( + , + i8, + , + i64); + +define @intrinsic_vmacc_vx_nxv4i8_i8_nxv4i8( %0, i8 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_vx_nxv4i8_i8_nxv4i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmacc.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vmacc.nxv4i8.i8( + %0, + i8 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmacc.mask.nxv4i8.i8( + , + i8, + , + , + i64); + +define @intrinsic_vmacc_mask_vx_nxv4i8_i8_nxv4i8( %0, i8 %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_mask_vx_nxv4i8_i8_nxv4i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmacc.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmacc.mask.nxv4i8.i8( + %0, + i8 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmacc.nxv8i8.i8( + , + i8, + , + i64); + +define @intrinsic_vmacc_vx_nxv8i8_i8_nxv8i8( %0, i8 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_vx_nxv8i8_i8_nxv8i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmacc.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vmacc.nxv8i8.i8( + %0, + i8 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmacc.mask.nxv8i8.i8( + , + i8, + , + , + i64); + +define @intrinsic_vmacc_mask_vx_nxv8i8_i8_nxv8i8( %0, i8 %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_mask_vx_nxv8i8_i8_nxv8i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmacc.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmacc.mask.nxv8i8.i8( + %0, + i8 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmacc.nxv16i8.i8( + , + i8, + , + i64); + +define @intrinsic_vmacc_vx_nxv16i8_i8_nxv16i8( %0, i8 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_vx_nxv16i8_i8_nxv16i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmacc.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vmacc.nxv16i8.i8( + %0, + i8 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmacc.mask.nxv16i8.i8( + , + i8, + , + , + i64); + +define @intrinsic_vmacc_mask_vx_nxv16i8_i8_nxv16i8( %0, i8 %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_mask_vx_nxv16i8_i8_nxv16i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmacc.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmacc.mask.nxv16i8.i8( + %0, + i8 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmacc.nxv32i8.i8( + , + i8, + , + i64); + +define @intrinsic_vmacc_vx_nxv32i8_i8_nxv32i8( %0, i8 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_vx_nxv32i8_i8_nxv32i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmacc.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vmacc.nxv32i8.i8( + %0, + i8 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmacc.mask.nxv32i8.i8( + , + i8, + , + , + i64); + +define @intrinsic_vmacc_mask_vx_nxv32i8_i8_nxv32i8( %0, i8 %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_mask_vx_nxv32i8_i8_nxv32i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmacc.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmacc.mask.nxv32i8.i8( + %0, + i8 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmacc.nxv1i16.i16( + , + i16, + , + i64); + +define @intrinsic_vmacc_vx_nxv1i16_i16_nxv1i16( %0, i16 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_vx_nxv1i16_i16_nxv1i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmacc.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vmacc.nxv1i16.i16( + %0, + i16 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmacc.mask.nxv1i16.i16( + , + i16, + , + , + i64); + +define @intrinsic_vmacc_mask_vx_nxv1i16_i16_nxv1i16( %0, i16 %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_mask_vx_nxv1i16_i16_nxv1i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmacc.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmacc.mask.nxv1i16.i16( + %0, + i16 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmacc.nxv2i16.i16( + , + i16, + , + i64); + +define @intrinsic_vmacc_vx_nxv2i16_i16_nxv2i16( %0, i16 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_vx_nxv2i16_i16_nxv2i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmacc.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vmacc.nxv2i16.i16( + %0, + i16 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmacc.mask.nxv2i16.i16( + , + i16, + , + , + i64); + +define @intrinsic_vmacc_mask_vx_nxv2i16_i16_nxv2i16( %0, i16 %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_mask_vx_nxv2i16_i16_nxv2i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmacc.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmacc.mask.nxv2i16.i16( + %0, + i16 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmacc.nxv4i16.i16( + , + i16, + , + i64); + +define @intrinsic_vmacc_vx_nxv4i16_i16_nxv4i16( %0, i16 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_vx_nxv4i16_i16_nxv4i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmacc.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vmacc.nxv4i16.i16( + %0, + i16 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmacc.mask.nxv4i16.i16( + , + i16, + , + , + i64); + +define @intrinsic_vmacc_mask_vx_nxv4i16_i16_nxv4i16( %0, i16 %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_mask_vx_nxv4i16_i16_nxv4i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmacc.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmacc.mask.nxv4i16.i16( + %0, + i16 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmacc.nxv8i16.i16( + , + i16, + , + i64); + +define @intrinsic_vmacc_vx_nxv8i16_i16_nxv8i16( %0, i16 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_vx_nxv8i16_i16_nxv8i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmacc.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vmacc.nxv8i16.i16( + %0, + i16 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmacc.mask.nxv8i16.i16( + , + i16, + , + , + i64); + +define @intrinsic_vmacc_mask_vx_nxv8i16_i16_nxv8i16( %0, i16 %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_mask_vx_nxv8i16_i16_nxv8i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmacc.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmacc.mask.nxv8i16.i16( + %0, + i16 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmacc.nxv16i16.i16( + , + i16, + , + i64); + +define @intrinsic_vmacc_vx_nxv16i16_i16_nxv16i16( %0, i16 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_vx_nxv16i16_i16_nxv16i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmacc.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vmacc.nxv16i16.i16( + %0, + i16 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmacc.mask.nxv16i16.i16( + , + i16, + , + , + i64); + +define @intrinsic_vmacc_mask_vx_nxv16i16_i16_nxv16i16( %0, i16 %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_mask_vx_nxv16i16_i16_nxv16i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmacc.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmacc.mask.nxv16i16.i16( + %0, + i16 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmacc.nxv1i32.i32( + , + i32, + , + i64); + +define @intrinsic_vmacc_vx_nxv1i32_i32_nxv1i32( %0, i32 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_vx_nxv1i32_i32_nxv1i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmacc.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vmacc.nxv1i32.i32( + %0, + i32 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmacc.mask.nxv1i32.i32( + , + i32, + , + , + i64); + +define @intrinsic_vmacc_mask_vx_nxv1i32_i32_nxv1i32( %0, i32 %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_mask_vx_nxv1i32_i32_nxv1i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmacc.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmacc.mask.nxv1i32.i32( + %0, + i32 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmacc.nxv2i32.i32( + , + i32, + , + i64); + +define @intrinsic_vmacc_vx_nxv2i32_i32_nxv2i32( %0, i32 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_vx_nxv2i32_i32_nxv2i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmacc.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vmacc.nxv2i32.i32( + %0, + i32 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmacc.mask.nxv2i32.i32( + , + i32, + , + , + i64); + +define @intrinsic_vmacc_mask_vx_nxv2i32_i32_nxv2i32( %0, i32 %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_mask_vx_nxv2i32_i32_nxv2i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmacc.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmacc.mask.nxv2i32.i32( + %0, + i32 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmacc.nxv4i32.i32( + , + i32, + , + i64); + +define @intrinsic_vmacc_vx_nxv4i32_i32_nxv4i32( %0, i32 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_vx_nxv4i32_i32_nxv4i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmacc.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vmacc.nxv4i32.i32( + %0, + i32 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmacc.mask.nxv4i32.i32( + , + i32, + , + , + i64); + +define @intrinsic_vmacc_mask_vx_nxv4i32_i32_nxv4i32( %0, i32 %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_mask_vx_nxv4i32_i32_nxv4i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmacc.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmacc.mask.nxv4i32.i32( + %0, + i32 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmacc.nxv8i32.i32( + , + i32, + , + i64); + +define @intrinsic_vmacc_vx_nxv8i32_i32_nxv8i32( %0, i32 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_vx_nxv8i32_i32_nxv8i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmacc.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vmacc.nxv8i32.i32( + %0, + i32 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmacc.mask.nxv8i32.i32( + , + i32, + , + , + i64); + +define @intrinsic_vmacc_mask_vx_nxv8i32_i32_nxv8i32( %0, i32 %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_mask_vx_nxv8i32_i32_nxv8i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmacc.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmacc.mask.nxv8i32.i32( + %0, + i32 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmacc.nxv1i64.i64( + , + i64, + , + i64); + +define @intrinsic_vmacc_vx_nxv1i64_i64_nxv1i64( %0, i64 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_vx_nxv1i64_i64_nxv1i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vmacc.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vmacc.nxv1i64.i64( + %0, + i64 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmacc.mask.nxv1i64.i64( + , + i64, + , + , + i64); + +define @intrinsic_vmacc_mask_vx_nxv1i64_i64_nxv1i64( %0, i64 %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_mask_vx_nxv1i64_i64_nxv1i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vmacc.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmacc.mask.nxv1i64.i64( + %0, + i64 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmacc.nxv2i64.i64( + , + i64, + , + i64); + +define @intrinsic_vmacc_vx_nxv2i64_i64_nxv2i64( %0, i64 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_vx_nxv2i64_i64_nxv2i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vmacc.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vmacc.nxv2i64.i64( + %0, + i64 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmacc.mask.nxv2i64.i64( + , + i64, + , + , + i64); + +define @intrinsic_vmacc_mask_vx_nxv2i64_i64_nxv2i64( %0, i64 %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_mask_vx_nxv2i64_i64_nxv2i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vmacc.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmacc.mask.nxv2i64.i64( + %0, + i64 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmacc.nxv4i64.i64( + , + i64, + , + i64); + +define @intrinsic_vmacc_vx_nxv4i64_i64_nxv4i64( %0, i64 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_vx_nxv4i64_i64_nxv4i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vmacc.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vmacc.nxv4i64.i64( + %0, + i64 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmacc.mask.nxv4i64.i64( + , + i64, + , + , + i64); + +define @intrinsic_vmacc_mask_vx_nxv4i64_i64_nxv4i64( %0, i64 %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmacc_mask_vx_nxv4i64_i64_nxv4i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vmacc.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmacc.mask.nxv4i64.i64( + %0, + i64 %1, + %2, + %3, + i64 %4) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vmadd-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmadd-rv32.ll new file mode 100644 index 0000000000000..92744c6e7df40 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vmadd-rv32.ll @@ -0,0 +1,1261 @@ +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare @llvm.riscv.vmadd.nxv1i8.nxv1i8( + , + , + , + i32); + +define @intrinsic_vmadd_vv_nxv1i8_nxv1i8_nxv1i8( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_vv_nxv1i8_nxv1i8_nxv1i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmadd.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmadd.nxv1i8.nxv1i8( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmadd.mask.nxv1i8.nxv1i8( + , + , + , + , + i32); + +define @intrinsic_vmadd_mask_vv_nxv1i8_nxv1i8_nxv1i8( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_mask_vv_nxv1i8_nxv1i8_nxv1i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmadd.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmadd.mask.nxv1i8.nxv1i8( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmadd.nxv2i8.nxv2i8( + , + , + , + i32); + +define @intrinsic_vmadd_vv_nxv2i8_nxv2i8_nxv2i8( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_vv_nxv2i8_nxv2i8_nxv2i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmadd.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmadd.nxv2i8.nxv2i8( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmadd.mask.nxv2i8.nxv2i8( + , + , + , + , + i32); + +define @intrinsic_vmadd_mask_vv_nxv2i8_nxv2i8_nxv2i8( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_mask_vv_nxv2i8_nxv2i8_nxv2i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmadd.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmadd.mask.nxv2i8.nxv2i8( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmadd.nxv4i8.nxv4i8( + , + , + , + i32); + +define @intrinsic_vmadd_vv_nxv4i8_nxv4i8_nxv4i8( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_vv_nxv4i8_nxv4i8_nxv4i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmadd.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmadd.nxv4i8.nxv4i8( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmadd.mask.nxv4i8.nxv4i8( + , + , + , + , + i32); + +define @intrinsic_vmadd_mask_vv_nxv4i8_nxv4i8_nxv4i8( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_mask_vv_nxv4i8_nxv4i8_nxv4i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmadd.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmadd.mask.nxv4i8.nxv4i8( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmadd.nxv8i8.nxv8i8( + , + , + , + i32); + +define @intrinsic_vmadd_vv_nxv8i8_nxv8i8_nxv8i8( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_vv_nxv8i8_nxv8i8_nxv8i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmadd.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmadd.nxv8i8.nxv8i8( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmadd.mask.nxv8i8.nxv8i8( + , + , + , + , + i32); + +define @intrinsic_vmadd_mask_vv_nxv8i8_nxv8i8_nxv8i8( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_mask_vv_nxv8i8_nxv8i8_nxv8i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmadd.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmadd.mask.nxv8i8.nxv8i8( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmadd.nxv16i8.nxv16i8( + , + , + , + i32); + +define @intrinsic_vmadd_vv_nxv16i8_nxv16i8_nxv16i8( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_vv_nxv16i8_nxv16i8_nxv16i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmadd.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmadd.nxv16i8.nxv16i8( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmadd.mask.nxv16i8.nxv16i8( + , + , + , + , + i32); + +define @intrinsic_vmadd_mask_vv_nxv16i8_nxv16i8_nxv16i8( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_mask_vv_nxv16i8_nxv16i8_nxv16i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmadd.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmadd.mask.nxv16i8.nxv16i8( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmadd.nxv32i8.nxv32i8( + , + , + , + i32); + +define @intrinsic_vmadd_vv_nxv32i8_nxv32i8_nxv32i8( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_vv_nxv32i8_nxv32i8_nxv32i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmadd.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmadd.nxv32i8.nxv32i8( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmadd.mask.nxv32i8.nxv32i8( + , + , + , + , + i32); + +define @intrinsic_vmadd_mask_vv_nxv32i8_nxv32i8_nxv32i8( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_mask_vv_nxv32i8_nxv32i8_nxv32i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmadd.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmadd.mask.nxv32i8.nxv32i8( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmadd.nxv1i16.nxv1i16( + , + , + , + i32); + +define @intrinsic_vmadd_vv_nxv1i16_nxv1i16_nxv1i16( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_vv_nxv1i16_nxv1i16_nxv1i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmadd.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmadd.nxv1i16.nxv1i16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmadd.mask.nxv1i16.nxv1i16( + , + , + , + , + i32); + +define @intrinsic_vmadd_mask_vv_nxv1i16_nxv1i16_nxv1i16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_mask_vv_nxv1i16_nxv1i16_nxv1i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmadd.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmadd.mask.nxv1i16.nxv1i16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmadd.nxv2i16.nxv2i16( + , + , + , + i32); + +define @intrinsic_vmadd_vv_nxv2i16_nxv2i16_nxv2i16( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_vv_nxv2i16_nxv2i16_nxv2i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmadd.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmadd.nxv2i16.nxv2i16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmadd.mask.nxv2i16.nxv2i16( + , + , + , + , + i32); + +define @intrinsic_vmadd_mask_vv_nxv2i16_nxv2i16_nxv2i16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_mask_vv_nxv2i16_nxv2i16_nxv2i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmadd.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmadd.mask.nxv2i16.nxv2i16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmadd.nxv4i16.nxv4i16( + , + , + , + i32); + +define @intrinsic_vmadd_vv_nxv4i16_nxv4i16_nxv4i16( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_vv_nxv4i16_nxv4i16_nxv4i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmadd.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmadd.nxv4i16.nxv4i16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmadd.mask.nxv4i16.nxv4i16( + , + , + , + , + i32); + +define @intrinsic_vmadd_mask_vv_nxv4i16_nxv4i16_nxv4i16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_mask_vv_nxv4i16_nxv4i16_nxv4i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmadd.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmadd.mask.nxv4i16.nxv4i16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmadd.nxv8i16.nxv8i16( + , + , + , + i32); + +define @intrinsic_vmadd_vv_nxv8i16_nxv8i16_nxv8i16( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_vv_nxv8i16_nxv8i16_nxv8i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmadd.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmadd.nxv8i16.nxv8i16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmadd.mask.nxv8i16.nxv8i16( + , + , + , + , + i32); + +define @intrinsic_vmadd_mask_vv_nxv8i16_nxv8i16_nxv8i16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_mask_vv_nxv8i16_nxv8i16_nxv8i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmadd.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmadd.mask.nxv8i16.nxv8i16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmadd.nxv16i16.nxv16i16( + , + , + , + i32); + +define @intrinsic_vmadd_vv_nxv16i16_nxv16i16_nxv16i16( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_vv_nxv16i16_nxv16i16_nxv16i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmadd.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmadd.nxv16i16.nxv16i16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmadd.mask.nxv16i16.nxv16i16( + , + , + , + , + i32); + +define @intrinsic_vmadd_mask_vv_nxv16i16_nxv16i16_nxv16i16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_mask_vv_nxv16i16_nxv16i16_nxv16i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmadd.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmadd.mask.nxv16i16.nxv16i16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmadd.nxv1i32.nxv1i32( + , + , + , + i32); + +define @intrinsic_vmadd_vv_nxv1i32_nxv1i32_nxv1i32( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_vv_nxv1i32_nxv1i32_nxv1i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmadd.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmadd.nxv1i32.nxv1i32( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmadd.mask.nxv1i32.nxv1i32( + , + , + , + , + i32); + +define @intrinsic_vmadd_mask_vv_nxv1i32_nxv1i32_nxv1i32( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_mask_vv_nxv1i32_nxv1i32_nxv1i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmadd.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmadd.mask.nxv1i32.nxv1i32( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmadd.nxv2i32.nxv2i32( + , + , + , + i32); + +define @intrinsic_vmadd_vv_nxv2i32_nxv2i32_nxv2i32( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_vv_nxv2i32_nxv2i32_nxv2i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmadd.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmadd.nxv2i32.nxv2i32( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmadd.mask.nxv2i32.nxv2i32( + , + , + , + , + i32); + +define @intrinsic_vmadd_mask_vv_nxv2i32_nxv2i32_nxv2i32( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_mask_vv_nxv2i32_nxv2i32_nxv2i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmadd.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmadd.mask.nxv2i32.nxv2i32( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmadd.nxv4i32.nxv4i32( + , + , + , + i32); + +define @intrinsic_vmadd_vv_nxv4i32_nxv4i32_nxv4i32( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_vv_nxv4i32_nxv4i32_nxv4i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmadd.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmadd.nxv4i32.nxv4i32( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmadd.mask.nxv4i32.nxv4i32( + , + , + , + , + i32); + +define @intrinsic_vmadd_mask_vv_nxv4i32_nxv4i32_nxv4i32( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_mask_vv_nxv4i32_nxv4i32_nxv4i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmadd.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmadd.mask.nxv4i32.nxv4i32( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmadd.nxv8i32.nxv8i32( + , + , + , + i32); + +define @intrinsic_vmadd_vv_nxv8i32_nxv8i32_nxv8i32( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_vv_nxv8i32_nxv8i32_nxv8i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmadd.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmadd.nxv8i32.nxv8i32( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmadd.mask.nxv8i32.nxv8i32( + , + , + , + , + i32); + +define @intrinsic_vmadd_mask_vv_nxv8i32_nxv8i32_nxv8i32( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_mask_vv_nxv8i32_nxv8i32_nxv8i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmadd.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmadd.mask.nxv8i32.nxv8i32( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmadd.nxv1i8.i8( + , + i8, + , + i32); + +define @intrinsic_vmadd_vx_nxv1i8_i8_nxv1i8( %0, i8 %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_vx_nxv1i8_i8_nxv1i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmadd.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vmadd.nxv1i8.i8( + %0, + i8 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmadd.mask.nxv1i8.i8( + , + i8, + , + , + i32); + +define @intrinsic_vmadd_mask_vx_nxv1i8_i8_nxv1i8( %0, i8 %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_mask_vx_nxv1i8_i8_nxv1i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmadd.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmadd.mask.nxv1i8.i8( + %0, + i8 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmadd.nxv2i8.i8( + , + i8, + , + i32); + +define @intrinsic_vmadd_vx_nxv2i8_i8_nxv2i8( %0, i8 %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_vx_nxv2i8_i8_nxv2i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmadd.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vmadd.nxv2i8.i8( + %0, + i8 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmadd.mask.nxv2i8.i8( + , + i8, + , + , + i32); + +define @intrinsic_vmadd_mask_vx_nxv2i8_i8_nxv2i8( %0, i8 %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_mask_vx_nxv2i8_i8_nxv2i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmadd.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmadd.mask.nxv2i8.i8( + %0, + i8 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmadd.nxv4i8.i8( + , + i8, + , + i32); + +define @intrinsic_vmadd_vx_nxv4i8_i8_nxv4i8( %0, i8 %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_vx_nxv4i8_i8_nxv4i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmadd.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vmadd.nxv4i8.i8( + %0, + i8 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmadd.mask.nxv4i8.i8( + , + i8, + , + , + i32); + +define @intrinsic_vmadd_mask_vx_nxv4i8_i8_nxv4i8( %0, i8 %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_mask_vx_nxv4i8_i8_nxv4i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmadd.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmadd.mask.nxv4i8.i8( + %0, + i8 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmadd.nxv8i8.i8( + , + i8, + , + i32); + +define @intrinsic_vmadd_vx_nxv8i8_i8_nxv8i8( %0, i8 %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_vx_nxv8i8_i8_nxv8i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmadd.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vmadd.nxv8i8.i8( + %0, + i8 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmadd.mask.nxv8i8.i8( + , + i8, + , + , + i32); + +define @intrinsic_vmadd_mask_vx_nxv8i8_i8_nxv8i8( %0, i8 %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_mask_vx_nxv8i8_i8_nxv8i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmadd.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmadd.mask.nxv8i8.i8( + %0, + i8 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmadd.nxv16i8.i8( + , + i8, + , + i32); + +define @intrinsic_vmadd_vx_nxv16i8_i8_nxv16i8( %0, i8 %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_vx_nxv16i8_i8_nxv16i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmadd.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vmadd.nxv16i8.i8( + %0, + i8 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmadd.mask.nxv16i8.i8( + , + i8, + , + , + i32); + +define @intrinsic_vmadd_mask_vx_nxv16i8_i8_nxv16i8( %0, i8 %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_mask_vx_nxv16i8_i8_nxv16i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmadd.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmadd.mask.nxv16i8.i8( + %0, + i8 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmadd.nxv32i8.i8( + , + i8, + , + i32); + +define @intrinsic_vmadd_vx_nxv32i8_i8_nxv32i8( %0, i8 %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_vx_nxv32i8_i8_nxv32i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmadd.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vmadd.nxv32i8.i8( + %0, + i8 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmadd.mask.nxv32i8.i8( + , + i8, + , + , + i32); + +define @intrinsic_vmadd_mask_vx_nxv32i8_i8_nxv32i8( %0, i8 %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_mask_vx_nxv32i8_i8_nxv32i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmadd.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmadd.mask.nxv32i8.i8( + %0, + i8 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmadd.nxv1i16.i16( + , + i16, + , + i32); + +define @intrinsic_vmadd_vx_nxv1i16_i16_nxv1i16( %0, i16 %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_vx_nxv1i16_i16_nxv1i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmadd.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vmadd.nxv1i16.i16( + %0, + i16 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmadd.mask.nxv1i16.i16( + , + i16, + , + , + i32); + +define @intrinsic_vmadd_mask_vx_nxv1i16_i16_nxv1i16( %0, i16 %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_mask_vx_nxv1i16_i16_nxv1i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmadd.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmadd.mask.nxv1i16.i16( + %0, + i16 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmadd.nxv2i16.i16( + , + i16, + , + i32); + +define @intrinsic_vmadd_vx_nxv2i16_i16_nxv2i16( %0, i16 %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_vx_nxv2i16_i16_nxv2i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmadd.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vmadd.nxv2i16.i16( + %0, + i16 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmadd.mask.nxv2i16.i16( + , + i16, + , + , + i32); + +define @intrinsic_vmadd_mask_vx_nxv2i16_i16_nxv2i16( %0, i16 %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_mask_vx_nxv2i16_i16_nxv2i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmadd.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmadd.mask.nxv2i16.i16( + %0, + i16 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmadd.nxv4i16.i16( + , + i16, + , + i32); + +define @intrinsic_vmadd_vx_nxv4i16_i16_nxv4i16( %0, i16 %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_vx_nxv4i16_i16_nxv4i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmadd.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vmadd.nxv4i16.i16( + %0, + i16 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmadd.mask.nxv4i16.i16( + , + i16, + , + , + i32); + +define @intrinsic_vmadd_mask_vx_nxv4i16_i16_nxv4i16( %0, i16 %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_mask_vx_nxv4i16_i16_nxv4i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmadd.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmadd.mask.nxv4i16.i16( + %0, + i16 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmadd.nxv8i16.i16( + , + i16, + , + i32); + +define @intrinsic_vmadd_vx_nxv8i16_i16_nxv8i16( %0, i16 %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_vx_nxv8i16_i16_nxv8i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmadd.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vmadd.nxv8i16.i16( + %0, + i16 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmadd.mask.nxv8i16.i16( + , + i16, + , + , + i32); + +define @intrinsic_vmadd_mask_vx_nxv8i16_i16_nxv8i16( %0, i16 %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_mask_vx_nxv8i16_i16_nxv8i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmadd.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmadd.mask.nxv8i16.i16( + %0, + i16 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmadd.nxv16i16.i16( + , + i16, + , + i32); + +define @intrinsic_vmadd_vx_nxv16i16_i16_nxv16i16( %0, i16 %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_vx_nxv16i16_i16_nxv16i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmadd.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vmadd.nxv16i16.i16( + %0, + i16 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmadd.mask.nxv16i16.i16( + , + i16, + , + , + i32); + +define @intrinsic_vmadd_mask_vx_nxv16i16_i16_nxv16i16( %0, i16 %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_mask_vx_nxv16i16_i16_nxv16i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmadd.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmadd.mask.nxv16i16.i16( + %0, + i16 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmadd.nxv1i32.i32( + , + i32, + , + i32); + +define @intrinsic_vmadd_vx_nxv1i32_i32_nxv1i32( %0, i32 %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_vx_nxv1i32_i32_nxv1i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmadd.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vmadd.nxv1i32.i32( + %0, + i32 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmadd.mask.nxv1i32.i32( + , + i32, + , + , + i32); + +define @intrinsic_vmadd_mask_vx_nxv1i32_i32_nxv1i32( %0, i32 %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_mask_vx_nxv1i32_i32_nxv1i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmadd.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmadd.mask.nxv1i32.i32( + %0, + i32 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmadd.nxv2i32.i32( + , + i32, + , + i32); + +define @intrinsic_vmadd_vx_nxv2i32_i32_nxv2i32( %0, i32 %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_vx_nxv2i32_i32_nxv2i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmadd.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vmadd.nxv2i32.i32( + %0, + i32 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmadd.mask.nxv2i32.i32( + , + i32, + , + , + i32); + +define @intrinsic_vmadd_mask_vx_nxv2i32_i32_nxv2i32( %0, i32 %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_mask_vx_nxv2i32_i32_nxv2i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmadd.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmadd.mask.nxv2i32.i32( + %0, + i32 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmadd.nxv4i32.i32( + , + i32, + , + i32); + +define @intrinsic_vmadd_vx_nxv4i32_i32_nxv4i32( %0, i32 %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_vx_nxv4i32_i32_nxv4i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmadd.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vmadd.nxv4i32.i32( + %0, + i32 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmadd.mask.nxv4i32.i32( + , + i32, + , + , + i32); + +define @intrinsic_vmadd_mask_vx_nxv4i32_i32_nxv4i32( %0, i32 %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_mask_vx_nxv4i32_i32_nxv4i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmadd.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmadd.mask.nxv4i32.i32( + %0, + i32 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmadd.nxv8i32.i32( + , + i32, + , + i32); + +define @intrinsic_vmadd_vx_nxv8i32_i32_nxv8i32( %0, i32 %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_vx_nxv8i32_i32_nxv8i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmadd.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vmadd.nxv8i32.i32( + %0, + i32 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmadd.mask.nxv8i32.i32( + , + i32, + , + , + i32); + +define @intrinsic_vmadd_mask_vx_nxv8i32_i32_nxv8i32( %0, i32 %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_mask_vx_nxv8i32_i32_nxv8i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmadd.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmadd.mask.nxv8i32.i32( + %0, + i32 %1, + %2, + %3, + i32 %4) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vmadd-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmadd-rv64.ll new file mode 100644 index 0000000000000..a6d229dcc7066 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vmadd-rv64.ll @@ -0,0 +1,1513 @@ +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare @llvm.riscv.vmadd.nxv1i8.nxv1i8( + , + , + , + i64); + +define @intrinsic_vmadd_vv_nxv1i8_nxv1i8_nxv1i8( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_vv_nxv1i8_nxv1i8_nxv1i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmadd.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmadd.nxv1i8.nxv1i8( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmadd.mask.nxv1i8.nxv1i8( + , + , + , + , + i64); + +define @intrinsic_vmadd_mask_vv_nxv1i8_nxv1i8_nxv1i8( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_mask_vv_nxv1i8_nxv1i8_nxv1i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmadd.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmadd.mask.nxv1i8.nxv1i8( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmadd.nxv2i8.nxv2i8( + , + , + , + i64); + +define @intrinsic_vmadd_vv_nxv2i8_nxv2i8_nxv2i8( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_vv_nxv2i8_nxv2i8_nxv2i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmadd.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmadd.nxv2i8.nxv2i8( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmadd.mask.nxv2i8.nxv2i8( + , + , + , + , + i64); + +define @intrinsic_vmadd_mask_vv_nxv2i8_nxv2i8_nxv2i8( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_mask_vv_nxv2i8_nxv2i8_nxv2i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmadd.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmadd.mask.nxv2i8.nxv2i8( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmadd.nxv4i8.nxv4i8( + , + , + , + i64); + +define @intrinsic_vmadd_vv_nxv4i8_nxv4i8_nxv4i8( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_vv_nxv4i8_nxv4i8_nxv4i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmadd.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmadd.nxv4i8.nxv4i8( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmadd.mask.nxv4i8.nxv4i8( + , + , + , + , + i64); + +define @intrinsic_vmadd_mask_vv_nxv4i8_nxv4i8_nxv4i8( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_mask_vv_nxv4i8_nxv4i8_nxv4i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmadd.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmadd.mask.nxv4i8.nxv4i8( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmadd.nxv8i8.nxv8i8( + , + , + , + i64); + +define @intrinsic_vmadd_vv_nxv8i8_nxv8i8_nxv8i8( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_vv_nxv8i8_nxv8i8_nxv8i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmadd.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmadd.nxv8i8.nxv8i8( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmadd.mask.nxv8i8.nxv8i8( + , + , + , + , + i64); + +define @intrinsic_vmadd_mask_vv_nxv8i8_nxv8i8_nxv8i8( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_mask_vv_nxv8i8_nxv8i8_nxv8i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmadd.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmadd.mask.nxv8i8.nxv8i8( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmadd.nxv16i8.nxv16i8( + , + , + , + i64); + +define @intrinsic_vmadd_vv_nxv16i8_nxv16i8_nxv16i8( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_vv_nxv16i8_nxv16i8_nxv16i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmadd.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmadd.nxv16i8.nxv16i8( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmadd.mask.nxv16i8.nxv16i8( + , + , + , + , + i64); + +define @intrinsic_vmadd_mask_vv_nxv16i8_nxv16i8_nxv16i8( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_mask_vv_nxv16i8_nxv16i8_nxv16i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmadd.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmadd.mask.nxv16i8.nxv16i8( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmadd.nxv32i8.nxv32i8( + , + , + , + i64); + +define @intrinsic_vmadd_vv_nxv32i8_nxv32i8_nxv32i8( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_vv_nxv32i8_nxv32i8_nxv32i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmadd.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmadd.nxv32i8.nxv32i8( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmadd.mask.nxv32i8.nxv32i8( + , + , + , + , + i64); + +define @intrinsic_vmadd_mask_vv_nxv32i8_nxv32i8_nxv32i8( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_mask_vv_nxv32i8_nxv32i8_nxv32i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmadd.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmadd.mask.nxv32i8.nxv32i8( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmadd.nxv1i16.nxv1i16( + , + , + , + i64); + +define @intrinsic_vmadd_vv_nxv1i16_nxv1i16_nxv1i16( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_vv_nxv1i16_nxv1i16_nxv1i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmadd.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmadd.nxv1i16.nxv1i16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmadd.mask.nxv1i16.nxv1i16( + , + , + , + , + i64); + +define @intrinsic_vmadd_mask_vv_nxv1i16_nxv1i16_nxv1i16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_mask_vv_nxv1i16_nxv1i16_nxv1i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmadd.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmadd.mask.nxv1i16.nxv1i16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmadd.nxv2i16.nxv2i16( + , + , + , + i64); + +define @intrinsic_vmadd_vv_nxv2i16_nxv2i16_nxv2i16( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_vv_nxv2i16_nxv2i16_nxv2i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmadd.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmadd.nxv2i16.nxv2i16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmadd.mask.nxv2i16.nxv2i16( + , + , + , + , + i64); + +define @intrinsic_vmadd_mask_vv_nxv2i16_nxv2i16_nxv2i16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_mask_vv_nxv2i16_nxv2i16_nxv2i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmadd.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmadd.mask.nxv2i16.nxv2i16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmadd.nxv4i16.nxv4i16( + , + , + , + i64); + +define @intrinsic_vmadd_vv_nxv4i16_nxv4i16_nxv4i16( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_vv_nxv4i16_nxv4i16_nxv4i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmadd.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmadd.nxv4i16.nxv4i16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmadd.mask.nxv4i16.nxv4i16( + , + , + , + , + i64); + +define @intrinsic_vmadd_mask_vv_nxv4i16_nxv4i16_nxv4i16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_mask_vv_nxv4i16_nxv4i16_nxv4i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmadd.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmadd.mask.nxv4i16.nxv4i16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmadd.nxv8i16.nxv8i16( + , + , + , + i64); + +define @intrinsic_vmadd_vv_nxv8i16_nxv8i16_nxv8i16( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_vv_nxv8i16_nxv8i16_nxv8i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmadd.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmadd.nxv8i16.nxv8i16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmadd.mask.nxv8i16.nxv8i16( + , + , + , + , + i64); + +define @intrinsic_vmadd_mask_vv_nxv8i16_nxv8i16_nxv8i16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_mask_vv_nxv8i16_nxv8i16_nxv8i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmadd.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmadd.mask.nxv8i16.nxv8i16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmadd.nxv16i16.nxv16i16( + , + , + , + i64); + +define @intrinsic_vmadd_vv_nxv16i16_nxv16i16_nxv16i16( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_vv_nxv16i16_nxv16i16_nxv16i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmadd.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmadd.nxv16i16.nxv16i16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmadd.mask.nxv16i16.nxv16i16( + , + , + , + , + i64); + +define @intrinsic_vmadd_mask_vv_nxv16i16_nxv16i16_nxv16i16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_mask_vv_nxv16i16_nxv16i16_nxv16i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmadd.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmadd.mask.nxv16i16.nxv16i16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmadd.nxv1i32.nxv1i32( + , + , + , + i64); + +define @intrinsic_vmadd_vv_nxv1i32_nxv1i32_nxv1i32( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_vv_nxv1i32_nxv1i32_nxv1i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmadd.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmadd.nxv1i32.nxv1i32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmadd.mask.nxv1i32.nxv1i32( + , + , + , + , + i64); + +define @intrinsic_vmadd_mask_vv_nxv1i32_nxv1i32_nxv1i32( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_mask_vv_nxv1i32_nxv1i32_nxv1i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmadd.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmadd.mask.nxv1i32.nxv1i32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmadd.nxv2i32.nxv2i32( + , + , + , + i64); + +define @intrinsic_vmadd_vv_nxv2i32_nxv2i32_nxv2i32( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_vv_nxv2i32_nxv2i32_nxv2i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmadd.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmadd.nxv2i32.nxv2i32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmadd.mask.nxv2i32.nxv2i32( + , + , + , + , + i64); + +define @intrinsic_vmadd_mask_vv_nxv2i32_nxv2i32_nxv2i32( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_mask_vv_nxv2i32_nxv2i32_nxv2i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmadd.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmadd.mask.nxv2i32.nxv2i32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmadd.nxv4i32.nxv4i32( + , + , + , + i64); + +define @intrinsic_vmadd_vv_nxv4i32_nxv4i32_nxv4i32( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_vv_nxv4i32_nxv4i32_nxv4i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmadd.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmadd.nxv4i32.nxv4i32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmadd.mask.nxv4i32.nxv4i32( + , + , + , + , + i64); + +define @intrinsic_vmadd_mask_vv_nxv4i32_nxv4i32_nxv4i32( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_mask_vv_nxv4i32_nxv4i32_nxv4i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmadd.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmadd.mask.nxv4i32.nxv4i32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmadd.nxv8i32.nxv8i32( + , + , + , + i64); + +define @intrinsic_vmadd_vv_nxv8i32_nxv8i32_nxv8i32( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_vv_nxv8i32_nxv8i32_nxv8i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmadd.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmadd.nxv8i32.nxv8i32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmadd.mask.nxv8i32.nxv8i32( + , + , + , + , + i64); + +define @intrinsic_vmadd_mask_vv_nxv8i32_nxv8i32_nxv8i32( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_mask_vv_nxv8i32_nxv8i32_nxv8i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmadd.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmadd.mask.nxv8i32.nxv8i32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmadd.nxv1i64.nxv1i64( + , + , + , + i64); + +define @intrinsic_vmadd_vv_nxv1i64_nxv1i64_nxv1i64( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_vv_nxv1i64_nxv1i64_nxv1i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vmadd.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmadd.nxv1i64.nxv1i64( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmadd.mask.nxv1i64.nxv1i64( + , + , + , + , + i64); + +define @intrinsic_vmadd_mask_vv_nxv1i64_nxv1i64_nxv1i64( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_mask_vv_nxv1i64_nxv1i64_nxv1i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vmadd.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmadd.mask.nxv1i64.nxv1i64( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmadd.nxv2i64.nxv2i64( + , + , + , + i64); + +define @intrinsic_vmadd_vv_nxv2i64_nxv2i64_nxv2i64( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_vv_nxv2i64_nxv2i64_nxv2i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vmadd.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmadd.nxv2i64.nxv2i64( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmadd.mask.nxv2i64.nxv2i64( + , + , + , + , + i64); + +define @intrinsic_vmadd_mask_vv_nxv2i64_nxv2i64_nxv2i64( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_mask_vv_nxv2i64_nxv2i64_nxv2i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vmadd.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmadd.mask.nxv2i64.nxv2i64( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmadd.nxv4i64.nxv4i64( + , + , + , + i64); + +define @intrinsic_vmadd_vv_nxv4i64_nxv4i64_nxv4i64( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_vv_nxv4i64_nxv4i64_nxv4i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vmadd.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmadd.nxv4i64.nxv4i64( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmadd.mask.nxv4i64.nxv4i64( + , + , + , + , + i64); + +define @intrinsic_vmadd_mask_vv_nxv4i64_nxv4i64_nxv4i64( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_mask_vv_nxv4i64_nxv4i64_nxv4i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vmadd.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmadd.mask.nxv4i64.nxv4i64( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmadd.nxv1i8.i8( + , + i8, + , + i64); + +define @intrinsic_vmadd_vx_nxv1i8_i8_nxv1i8( %0, i8 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_vx_nxv1i8_i8_nxv1i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmadd.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vmadd.nxv1i8.i8( + %0, + i8 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmadd.mask.nxv1i8.i8( + , + i8, + , + , + i64); + +define @intrinsic_vmadd_mask_vx_nxv1i8_i8_nxv1i8( %0, i8 %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_mask_vx_nxv1i8_i8_nxv1i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmadd.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmadd.mask.nxv1i8.i8( + %0, + i8 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmadd.nxv2i8.i8( + , + i8, + , + i64); + +define @intrinsic_vmadd_vx_nxv2i8_i8_nxv2i8( %0, i8 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_vx_nxv2i8_i8_nxv2i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmadd.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vmadd.nxv2i8.i8( + %0, + i8 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmadd.mask.nxv2i8.i8( + , + i8, + , + , + i64); + +define @intrinsic_vmadd_mask_vx_nxv2i8_i8_nxv2i8( %0, i8 %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_mask_vx_nxv2i8_i8_nxv2i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmadd.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmadd.mask.nxv2i8.i8( + %0, + i8 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmadd.nxv4i8.i8( + , + i8, + , + i64); + +define @intrinsic_vmadd_vx_nxv4i8_i8_nxv4i8( %0, i8 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_vx_nxv4i8_i8_nxv4i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmadd.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vmadd.nxv4i8.i8( + %0, + i8 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmadd.mask.nxv4i8.i8( + , + i8, + , + , + i64); + +define @intrinsic_vmadd_mask_vx_nxv4i8_i8_nxv4i8( %0, i8 %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_mask_vx_nxv4i8_i8_nxv4i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmadd.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmadd.mask.nxv4i8.i8( + %0, + i8 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmadd.nxv8i8.i8( + , + i8, + , + i64); + +define @intrinsic_vmadd_vx_nxv8i8_i8_nxv8i8( %0, i8 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_vx_nxv8i8_i8_nxv8i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmadd.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vmadd.nxv8i8.i8( + %0, + i8 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmadd.mask.nxv8i8.i8( + , + i8, + , + , + i64); + +define @intrinsic_vmadd_mask_vx_nxv8i8_i8_nxv8i8( %0, i8 %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_mask_vx_nxv8i8_i8_nxv8i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmadd.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmadd.mask.nxv8i8.i8( + %0, + i8 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmadd.nxv16i8.i8( + , + i8, + , + i64); + +define @intrinsic_vmadd_vx_nxv16i8_i8_nxv16i8( %0, i8 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_vx_nxv16i8_i8_nxv16i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmadd.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vmadd.nxv16i8.i8( + %0, + i8 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmadd.mask.nxv16i8.i8( + , + i8, + , + , + i64); + +define @intrinsic_vmadd_mask_vx_nxv16i8_i8_nxv16i8( %0, i8 %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_mask_vx_nxv16i8_i8_nxv16i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmadd.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmadd.mask.nxv16i8.i8( + %0, + i8 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmadd.nxv32i8.i8( + , + i8, + , + i64); + +define @intrinsic_vmadd_vx_nxv32i8_i8_nxv32i8( %0, i8 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_vx_nxv32i8_i8_nxv32i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmadd.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vmadd.nxv32i8.i8( + %0, + i8 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmadd.mask.nxv32i8.i8( + , + i8, + , + , + i64); + +define @intrinsic_vmadd_mask_vx_nxv32i8_i8_nxv32i8( %0, i8 %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_mask_vx_nxv32i8_i8_nxv32i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmadd.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmadd.mask.nxv32i8.i8( + %0, + i8 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmadd.nxv1i16.i16( + , + i16, + , + i64); + +define @intrinsic_vmadd_vx_nxv1i16_i16_nxv1i16( %0, i16 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_vx_nxv1i16_i16_nxv1i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmadd.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vmadd.nxv1i16.i16( + %0, + i16 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmadd.mask.nxv1i16.i16( + , + i16, + , + , + i64); + +define @intrinsic_vmadd_mask_vx_nxv1i16_i16_nxv1i16( %0, i16 %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_mask_vx_nxv1i16_i16_nxv1i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmadd.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmadd.mask.nxv1i16.i16( + %0, + i16 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmadd.nxv2i16.i16( + , + i16, + , + i64); + +define @intrinsic_vmadd_vx_nxv2i16_i16_nxv2i16( %0, i16 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_vx_nxv2i16_i16_nxv2i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmadd.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vmadd.nxv2i16.i16( + %0, + i16 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmadd.mask.nxv2i16.i16( + , + i16, + , + , + i64); + +define @intrinsic_vmadd_mask_vx_nxv2i16_i16_nxv2i16( %0, i16 %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_mask_vx_nxv2i16_i16_nxv2i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmadd.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmadd.mask.nxv2i16.i16( + %0, + i16 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmadd.nxv4i16.i16( + , + i16, + , + i64); + +define @intrinsic_vmadd_vx_nxv4i16_i16_nxv4i16( %0, i16 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_vx_nxv4i16_i16_nxv4i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmadd.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vmadd.nxv4i16.i16( + %0, + i16 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmadd.mask.nxv4i16.i16( + , + i16, + , + , + i64); + +define @intrinsic_vmadd_mask_vx_nxv4i16_i16_nxv4i16( %0, i16 %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_mask_vx_nxv4i16_i16_nxv4i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmadd.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmadd.mask.nxv4i16.i16( + %0, + i16 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmadd.nxv8i16.i16( + , + i16, + , + i64); + +define @intrinsic_vmadd_vx_nxv8i16_i16_nxv8i16( %0, i16 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_vx_nxv8i16_i16_nxv8i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmadd.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vmadd.nxv8i16.i16( + %0, + i16 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmadd.mask.nxv8i16.i16( + , + i16, + , + , + i64); + +define @intrinsic_vmadd_mask_vx_nxv8i16_i16_nxv8i16( %0, i16 %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_mask_vx_nxv8i16_i16_nxv8i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmadd.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmadd.mask.nxv8i16.i16( + %0, + i16 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmadd.nxv16i16.i16( + , + i16, + , + i64); + +define @intrinsic_vmadd_vx_nxv16i16_i16_nxv16i16( %0, i16 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_vx_nxv16i16_i16_nxv16i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmadd.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vmadd.nxv16i16.i16( + %0, + i16 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmadd.mask.nxv16i16.i16( + , + i16, + , + , + i64); + +define @intrinsic_vmadd_mask_vx_nxv16i16_i16_nxv16i16( %0, i16 %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_mask_vx_nxv16i16_i16_nxv16i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmadd.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmadd.mask.nxv16i16.i16( + %0, + i16 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmadd.nxv1i32.i32( + , + i32, + , + i64); + +define @intrinsic_vmadd_vx_nxv1i32_i32_nxv1i32( %0, i32 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_vx_nxv1i32_i32_nxv1i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmadd.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vmadd.nxv1i32.i32( + %0, + i32 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmadd.mask.nxv1i32.i32( + , + i32, + , + , + i64); + +define @intrinsic_vmadd_mask_vx_nxv1i32_i32_nxv1i32( %0, i32 %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_mask_vx_nxv1i32_i32_nxv1i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmadd.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmadd.mask.nxv1i32.i32( + %0, + i32 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmadd.nxv2i32.i32( + , + i32, + , + i64); + +define @intrinsic_vmadd_vx_nxv2i32_i32_nxv2i32( %0, i32 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_vx_nxv2i32_i32_nxv2i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmadd.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vmadd.nxv2i32.i32( + %0, + i32 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmadd.mask.nxv2i32.i32( + , + i32, + , + , + i64); + +define @intrinsic_vmadd_mask_vx_nxv2i32_i32_nxv2i32( %0, i32 %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_mask_vx_nxv2i32_i32_nxv2i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmadd.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmadd.mask.nxv2i32.i32( + %0, + i32 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmadd.nxv4i32.i32( + , + i32, + , + i64); + +define @intrinsic_vmadd_vx_nxv4i32_i32_nxv4i32( %0, i32 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_vx_nxv4i32_i32_nxv4i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmadd.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vmadd.nxv4i32.i32( + %0, + i32 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmadd.mask.nxv4i32.i32( + , + i32, + , + , + i64); + +define @intrinsic_vmadd_mask_vx_nxv4i32_i32_nxv4i32( %0, i32 %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_mask_vx_nxv4i32_i32_nxv4i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmadd.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmadd.mask.nxv4i32.i32( + %0, + i32 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmadd.nxv8i32.i32( + , + i32, + , + i64); + +define @intrinsic_vmadd_vx_nxv8i32_i32_nxv8i32( %0, i32 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_vx_nxv8i32_i32_nxv8i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmadd.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vmadd.nxv8i32.i32( + %0, + i32 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmadd.mask.nxv8i32.i32( + , + i32, + , + , + i64); + +define @intrinsic_vmadd_mask_vx_nxv8i32_i32_nxv8i32( %0, i32 %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_mask_vx_nxv8i32_i32_nxv8i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmadd.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmadd.mask.nxv8i32.i32( + %0, + i32 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmadd.nxv1i64.i64( + , + i64, + , + i64); + +define @intrinsic_vmadd_vx_nxv1i64_i64_nxv1i64( %0, i64 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_vx_nxv1i64_i64_nxv1i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vmadd.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vmadd.nxv1i64.i64( + %0, + i64 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmadd.mask.nxv1i64.i64( + , + i64, + , + , + i64); + +define @intrinsic_vmadd_mask_vx_nxv1i64_i64_nxv1i64( %0, i64 %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_mask_vx_nxv1i64_i64_nxv1i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vmadd.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmadd.mask.nxv1i64.i64( + %0, + i64 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmadd.nxv2i64.i64( + , + i64, + , + i64); + +define @intrinsic_vmadd_vx_nxv2i64_i64_nxv2i64( %0, i64 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_vx_nxv2i64_i64_nxv2i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vmadd.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vmadd.nxv2i64.i64( + %0, + i64 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmadd.mask.nxv2i64.i64( + , + i64, + , + , + i64); + +define @intrinsic_vmadd_mask_vx_nxv2i64_i64_nxv2i64( %0, i64 %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_mask_vx_nxv2i64_i64_nxv2i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vmadd.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmadd.mask.nxv2i64.i64( + %0, + i64 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmadd.nxv4i64.i64( + , + i64, + , + i64); + +define @intrinsic_vmadd_vx_nxv4i64_i64_nxv4i64( %0, i64 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_vx_nxv4i64_i64_nxv4i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vmadd.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vmadd.nxv4i64.i64( + %0, + i64 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmadd.mask.nxv4i64.i64( + , + i64, + , + , + i64); + +define @intrinsic_vmadd_mask_vx_nxv4i64_i64_nxv4i64( %0, i64 %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmadd_mask_vx_nxv4i64_i64_nxv4i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vmadd.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vmadd.mask.nxv4i64.i64( + %0, + i64 %1, + %2, + %3, + i64 %4) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vnmsac-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vnmsac-rv32.ll new file mode 100644 index 0000000000000..e6997482a8701 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vnmsac-rv32.ll @@ -0,0 +1,1261 @@ +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare @llvm.riscv.vnmsac.nxv1i8.nxv1i8( + , + , + , + i32); + +define @intrinsic_vnmsac_vv_nxv1i8_nxv1i8_nxv1i8( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_vv_nxv1i8_nxv1i8_nxv1i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vnmsac.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsac.nxv1i8.nxv1i8( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vnmsac.mask.nxv1i8.nxv1i8( + , + , + , + , + i32); + +define @intrinsic_vnmsac_mask_vv_nxv1i8_nxv1i8_nxv1i8( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_mask_vv_nxv1i8_nxv1i8_nxv1i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vnmsac.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsac.mask.nxv1i8.nxv1i8( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vnmsac.nxv2i8.nxv2i8( + , + , + , + i32); + +define @intrinsic_vnmsac_vv_nxv2i8_nxv2i8_nxv2i8( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_vv_nxv2i8_nxv2i8_nxv2i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vnmsac.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsac.nxv2i8.nxv2i8( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vnmsac.mask.nxv2i8.nxv2i8( + , + , + , + , + i32); + +define @intrinsic_vnmsac_mask_vv_nxv2i8_nxv2i8_nxv2i8( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_mask_vv_nxv2i8_nxv2i8_nxv2i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vnmsac.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsac.mask.nxv2i8.nxv2i8( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vnmsac.nxv4i8.nxv4i8( + , + , + , + i32); + +define @intrinsic_vnmsac_vv_nxv4i8_nxv4i8_nxv4i8( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_vv_nxv4i8_nxv4i8_nxv4i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vnmsac.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsac.nxv4i8.nxv4i8( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vnmsac.mask.nxv4i8.nxv4i8( + , + , + , + , + i32); + +define @intrinsic_vnmsac_mask_vv_nxv4i8_nxv4i8_nxv4i8( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_mask_vv_nxv4i8_nxv4i8_nxv4i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vnmsac.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsac.mask.nxv4i8.nxv4i8( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vnmsac.nxv8i8.nxv8i8( + , + , + , + i32); + +define @intrinsic_vnmsac_vv_nxv8i8_nxv8i8_nxv8i8( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_vv_nxv8i8_nxv8i8_nxv8i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vnmsac.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsac.nxv8i8.nxv8i8( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vnmsac.mask.nxv8i8.nxv8i8( + , + , + , + , + i32); + +define @intrinsic_vnmsac_mask_vv_nxv8i8_nxv8i8_nxv8i8( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_mask_vv_nxv8i8_nxv8i8_nxv8i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vnmsac.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsac.mask.nxv8i8.nxv8i8( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vnmsac.nxv16i8.nxv16i8( + , + , + , + i32); + +define @intrinsic_vnmsac_vv_nxv16i8_nxv16i8_nxv16i8( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_vv_nxv16i8_nxv16i8_nxv16i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vnmsac.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsac.nxv16i8.nxv16i8( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vnmsac.mask.nxv16i8.nxv16i8( + , + , + , + , + i32); + +define @intrinsic_vnmsac_mask_vv_nxv16i8_nxv16i8_nxv16i8( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_mask_vv_nxv16i8_nxv16i8_nxv16i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vnmsac.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsac.mask.nxv16i8.nxv16i8( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vnmsac.nxv32i8.nxv32i8( + , + , + , + i32); + +define @intrinsic_vnmsac_vv_nxv32i8_nxv32i8_nxv32i8( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_vv_nxv32i8_nxv32i8_nxv32i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vnmsac.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsac.nxv32i8.nxv32i8( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vnmsac.mask.nxv32i8.nxv32i8( + , + , + , + , + i32); + +define @intrinsic_vnmsac_mask_vv_nxv32i8_nxv32i8_nxv32i8( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_mask_vv_nxv32i8_nxv32i8_nxv32i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vnmsac.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsac.mask.nxv32i8.nxv32i8( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vnmsac.nxv1i16.nxv1i16( + , + , + , + i32); + +define @intrinsic_vnmsac_vv_nxv1i16_nxv1i16_nxv1i16( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_vv_nxv1i16_nxv1i16_nxv1i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vnmsac.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsac.nxv1i16.nxv1i16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vnmsac.mask.nxv1i16.nxv1i16( + , + , + , + , + i32); + +define @intrinsic_vnmsac_mask_vv_nxv1i16_nxv1i16_nxv1i16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_mask_vv_nxv1i16_nxv1i16_nxv1i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vnmsac.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsac.mask.nxv1i16.nxv1i16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vnmsac.nxv2i16.nxv2i16( + , + , + , + i32); + +define @intrinsic_vnmsac_vv_nxv2i16_nxv2i16_nxv2i16( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_vv_nxv2i16_nxv2i16_nxv2i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vnmsac.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsac.nxv2i16.nxv2i16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vnmsac.mask.nxv2i16.nxv2i16( + , + , + , + , + i32); + +define @intrinsic_vnmsac_mask_vv_nxv2i16_nxv2i16_nxv2i16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_mask_vv_nxv2i16_nxv2i16_nxv2i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vnmsac.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsac.mask.nxv2i16.nxv2i16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vnmsac.nxv4i16.nxv4i16( + , + , + , + i32); + +define @intrinsic_vnmsac_vv_nxv4i16_nxv4i16_nxv4i16( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_vv_nxv4i16_nxv4i16_nxv4i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vnmsac.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsac.nxv4i16.nxv4i16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vnmsac.mask.nxv4i16.nxv4i16( + , + , + , + , + i32); + +define @intrinsic_vnmsac_mask_vv_nxv4i16_nxv4i16_nxv4i16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_mask_vv_nxv4i16_nxv4i16_nxv4i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vnmsac.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsac.mask.nxv4i16.nxv4i16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vnmsac.nxv8i16.nxv8i16( + , + , + , + i32); + +define @intrinsic_vnmsac_vv_nxv8i16_nxv8i16_nxv8i16( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_vv_nxv8i16_nxv8i16_nxv8i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vnmsac.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsac.nxv8i16.nxv8i16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vnmsac.mask.nxv8i16.nxv8i16( + , + , + , + , + i32); + +define @intrinsic_vnmsac_mask_vv_nxv8i16_nxv8i16_nxv8i16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_mask_vv_nxv8i16_nxv8i16_nxv8i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vnmsac.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsac.mask.nxv8i16.nxv8i16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vnmsac.nxv16i16.nxv16i16( + , + , + , + i32); + +define @intrinsic_vnmsac_vv_nxv16i16_nxv16i16_nxv16i16( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_vv_nxv16i16_nxv16i16_nxv16i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vnmsac.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsac.nxv16i16.nxv16i16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vnmsac.mask.nxv16i16.nxv16i16( + , + , + , + , + i32); + +define @intrinsic_vnmsac_mask_vv_nxv16i16_nxv16i16_nxv16i16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_mask_vv_nxv16i16_nxv16i16_nxv16i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vnmsac.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsac.mask.nxv16i16.nxv16i16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vnmsac.nxv1i32.nxv1i32( + , + , + , + i32); + +define @intrinsic_vnmsac_vv_nxv1i32_nxv1i32_nxv1i32( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_vv_nxv1i32_nxv1i32_nxv1i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vnmsac.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsac.nxv1i32.nxv1i32( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vnmsac.mask.nxv1i32.nxv1i32( + , + , + , + , + i32); + +define @intrinsic_vnmsac_mask_vv_nxv1i32_nxv1i32_nxv1i32( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_mask_vv_nxv1i32_nxv1i32_nxv1i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vnmsac.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsac.mask.nxv1i32.nxv1i32( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vnmsac.nxv2i32.nxv2i32( + , + , + , + i32); + +define @intrinsic_vnmsac_vv_nxv2i32_nxv2i32_nxv2i32( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_vv_nxv2i32_nxv2i32_nxv2i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vnmsac.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsac.nxv2i32.nxv2i32( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vnmsac.mask.nxv2i32.nxv2i32( + , + , + , + , + i32); + +define @intrinsic_vnmsac_mask_vv_nxv2i32_nxv2i32_nxv2i32( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_mask_vv_nxv2i32_nxv2i32_nxv2i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vnmsac.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsac.mask.nxv2i32.nxv2i32( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vnmsac.nxv4i32.nxv4i32( + , + , + , + i32); + +define @intrinsic_vnmsac_vv_nxv4i32_nxv4i32_nxv4i32( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_vv_nxv4i32_nxv4i32_nxv4i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vnmsac.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsac.nxv4i32.nxv4i32( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vnmsac.mask.nxv4i32.nxv4i32( + , + , + , + , + i32); + +define @intrinsic_vnmsac_mask_vv_nxv4i32_nxv4i32_nxv4i32( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_mask_vv_nxv4i32_nxv4i32_nxv4i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vnmsac.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsac.mask.nxv4i32.nxv4i32( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vnmsac.nxv8i32.nxv8i32( + , + , + , + i32); + +define @intrinsic_vnmsac_vv_nxv8i32_nxv8i32_nxv8i32( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_vv_nxv8i32_nxv8i32_nxv8i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vnmsac.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsac.nxv8i32.nxv8i32( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vnmsac.mask.nxv8i32.nxv8i32( + , + , + , + , + i32); + +define @intrinsic_vnmsac_mask_vv_nxv8i32_nxv8i32_nxv8i32( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_mask_vv_nxv8i32_nxv8i32_nxv8i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vnmsac.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsac.mask.nxv8i32.nxv8i32( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vnmsac.nxv1i8.i8( + , + i8, + , + i32); + +define @intrinsic_vnmsac_vx_nxv1i8_i8_nxv1i8( %0, i8 %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_vx_nxv1i8_i8_nxv1i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vnmsac.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsac.nxv1i8.i8( + %0, + i8 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vnmsac.mask.nxv1i8.i8( + , + i8, + , + , + i32); + +define @intrinsic_vnmsac_mask_vx_nxv1i8_i8_nxv1i8( %0, i8 %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_mask_vx_nxv1i8_i8_nxv1i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vnmsac.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsac.mask.nxv1i8.i8( + %0, + i8 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vnmsac.nxv2i8.i8( + , + i8, + , + i32); + +define @intrinsic_vnmsac_vx_nxv2i8_i8_nxv2i8( %0, i8 %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_vx_nxv2i8_i8_nxv2i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vnmsac.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsac.nxv2i8.i8( + %0, + i8 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vnmsac.mask.nxv2i8.i8( + , + i8, + , + , + i32); + +define @intrinsic_vnmsac_mask_vx_nxv2i8_i8_nxv2i8( %0, i8 %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_mask_vx_nxv2i8_i8_nxv2i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vnmsac.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsac.mask.nxv2i8.i8( + %0, + i8 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vnmsac.nxv4i8.i8( + , + i8, + , + i32); + +define @intrinsic_vnmsac_vx_nxv4i8_i8_nxv4i8( %0, i8 %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_vx_nxv4i8_i8_nxv4i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vnmsac.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsac.nxv4i8.i8( + %0, + i8 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vnmsac.mask.nxv4i8.i8( + , + i8, + , + , + i32); + +define @intrinsic_vnmsac_mask_vx_nxv4i8_i8_nxv4i8( %0, i8 %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_mask_vx_nxv4i8_i8_nxv4i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vnmsac.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsac.mask.nxv4i8.i8( + %0, + i8 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vnmsac.nxv8i8.i8( + , + i8, + , + i32); + +define @intrinsic_vnmsac_vx_nxv8i8_i8_nxv8i8( %0, i8 %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_vx_nxv8i8_i8_nxv8i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vnmsac.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsac.nxv8i8.i8( + %0, + i8 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vnmsac.mask.nxv8i8.i8( + , + i8, + , + , + i32); + +define @intrinsic_vnmsac_mask_vx_nxv8i8_i8_nxv8i8( %0, i8 %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_mask_vx_nxv8i8_i8_nxv8i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vnmsac.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsac.mask.nxv8i8.i8( + %0, + i8 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vnmsac.nxv16i8.i8( + , + i8, + , + i32); + +define @intrinsic_vnmsac_vx_nxv16i8_i8_nxv16i8( %0, i8 %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_vx_nxv16i8_i8_nxv16i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vnmsac.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsac.nxv16i8.i8( + %0, + i8 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vnmsac.mask.nxv16i8.i8( + , + i8, + , + , + i32); + +define @intrinsic_vnmsac_mask_vx_nxv16i8_i8_nxv16i8( %0, i8 %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_mask_vx_nxv16i8_i8_nxv16i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vnmsac.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsac.mask.nxv16i8.i8( + %0, + i8 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vnmsac.nxv32i8.i8( + , + i8, + , + i32); + +define @intrinsic_vnmsac_vx_nxv32i8_i8_nxv32i8( %0, i8 %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_vx_nxv32i8_i8_nxv32i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vnmsac.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsac.nxv32i8.i8( + %0, + i8 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vnmsac.mask.nxv32i8.i8( + , + i8, + , + , + i32); + +define @intrinsic_vnmsac_mask_vx_nxv32i8_i8_nxv32i8( %0, i8 %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_mask_vx_nxv32i8_i8_nxv32i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vnmsac.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsac.mask.nxv32i8.i8( + %0, + i8 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vnmsac.nxv1i16.i16( + , + i16, + , + i32); + +define @intrinsic_vnmsac_vx_nxv1i16_i16_nxv1i16( %0, i16 %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_vx_nxv1i16_i16_nxv1i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vnmsac.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsac.nxv1i16.i16( + %0, + i16 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vnmsac.mask.nxv1i16.i16( + , + i16, + , + , + i32); + +define @intrinsic_vnmsac_mask_vx_nxv1i16_i16_nxv1i16( %0, i16 %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_mask_vx_nxv1i16_i16_nxv1i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vnmsac.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsac.mask.nxv1i16.i16( + %0, + i16 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vnmsac.nxv2i16.i16( + , + i16, + , + i32); + +define @intrinsic_vnmsac_vx_nxv2i16_i16_nxv2i16( %0, i16 %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_vx_nxv2i16_i16_nxv2i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vnmsac.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsac.nxv2i16.i16( + %0, + i16 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vnmsac.mask.nxv2i16.i16( + , + i16, + , + , + i32); + +define @intrinsic_vnmsac_mask_vx_nxv2i16_i16_nxv2i16( %0, i16 %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_mask_vx_nxv2i16_i16_nxv2i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vnmsac.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsac.mask.nxv2i16.i16( + %0, + i16 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vnmsac.nxv4i16.i16( + , + i16, + , + i32); + +define @intrinsic_vnmsac_vx_nxv4i16_i16_nxv4i16( %0, i16 %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_vx_nxv4i16_i16_nxv4i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vnmsac.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsac.nxv4i16.i16( + %0, + i16 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vnmsac.mask.nxv4i16.i16( + , + i16, + , + , + i32); + +define @intrinsic_vnmsac_mask_vx_nxv4i16_i16_nxv4i16( %0, i16 %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_mask_vx_nxv4i16_i16_nxv4i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vnmsac.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsac.mask.nxv4i16.i16( + %0, + i16 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vnmsac.nxv8i16.i16( + , + i16, + , + i32); + +define @intrinsic_vnmsac_vx_nxv8i16_i16_nxv8i16( %0, i16 %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_vx_nxv8i16_i16_nxv8i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vnmsac.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsac.nxv8i16.i16( + %0, + i16 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vnmsac.mask.nxv8i16.i16( + , + i16, + , + , + i32); + +define @intrinsic_vnmsac_mask_vx_nxv8i16_i16_nxv8i16( %0, i16 %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_mask_vx_nxv8i16_i16_nxv8i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vnmsac.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsac.mask.nxv8i16.i16( + %0, + i16 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vnmsac.nxv16i16.i16( + , + i16, + , + i32); + +define @intrinsic_vnmsac_vx_nxv16i16_i16_nxv16i16( %0, i16 %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_vx_nxv16i16_i16_nxv16i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vnmsac.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsac.nxv16i16.i16( + %0, + i16 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vnmsac.mask.nxv16i16.i16( + , + i16, + , + , + i32); + +define @intrinsic_vnmsac_mask_vx_nxv16i16_i16_nxv16i16( %0, i16 %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_mask_vx_nxv16i16_i16_nxv16i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vnmsac.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsac.mask.nxv16i16.i16( + %0, + i16 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vnmsac.nxv1i32.i32( + , + i32, + , + i32); + +define @intrinsic_vnmsac_vx_nxv1i32_i32_nxv1i32( %0, i32 %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_vx_nxv1i32_i32_nxv1i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vnmsac.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsac.nxv1i32.i32( + %0, + i32 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vnmsac.mask.nxv1i32.i32( + , + i32, + , + , + i32); + +define @intrinsic_vnmsac_mask_vx_nxv1i32_i32_nxv1i32( %0, i32 %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_mask_vx_nxv1i32_i32_nxv1i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vnmsac.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsac.mask.nxv1i32.i32( + %0, + i32 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vnmsac.nxv2i32.i32( + , + i32, + , + i32); + +define @intrinsic_vnmsac_vx_nxv2i32_i32_nxv2i32( %0, i32 %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_vx_nxv2i32_i32_nxv2i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vnmsac.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsac.nxv2i32.i32( + %0, + i32 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vnmsac.mask.nxv2i32.i32( + , + i32, + , + , + i32); + +define @intrinsic_vnmsac_mask_vx_nxv2i32_i32_nxv2i32( %0, i32 %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_mask_vx_nxv2i32_i32_nxv2i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vnmsac.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsac.mask.nxv2i32.i32( + %0, + i32 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vnmsac.nxv4i32.i32( + , + i32, + , + i32); + +define @intrinsic_vnmsac_vx_nxv4i32_i32_nxv4i32( %0, i32 %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_vx_nxv4i32_i32_nxv4i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vnmsac.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsac.nxv4i32.i32( + %0, + i32 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vnmsac.mask.nxv4i32.i32( + , + i32, + , + , + i32); + +define @intrinsic_vnmsac_mask_vx_nxv4i32_i32_nxv4i32( %0, i32 %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_mask_vx_nxv4i32_i32_nxv4i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vnmsac.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsac.mask.nxv4i32.i32( + %0, + i32 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vnmsac.nxv8i32.i32( + , + i32, + , + i32); + +define @intrinsic_vnmsac_vx_nxv8i32_i32_nxv8i32( %0, i32 %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_vx_nxv8i32_i32_nxv8i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vnmsac.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsac.nxv8i32.i32( + %0, + i32 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vnmsac.mask.nxv8i32.i32( + , + i32, + , + , + i32); + +define @intrinsic_vnmsac_mask_vx_nxv8i32_i32_nxv8i32( %0, i32 %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_mask_vx_nxv8i32_i32_nxv8i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vnmsac.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsac.mask.nxv8i32.i32( + %0, + i32 %1, + %2, + %3, + i32 %4) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vnmsac-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vnmsac-rv64.ll new file mode 100644 index 0000000000000..d79c4f6deeffd --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vnmsac-rv64.ll @@ -0,0 +1,1513 @@ +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare @llvm.riscv.vnmsac.nxv1i8.nxv1i8( + , + , + , + i64); + +define @intrinsic_vnmsac_vv_nxv1i8_nxv1i8_nxv1i8( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_vv_nxv1i8_nxv1i8_nxv1i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vnmsac.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsac.nxv1i8.nxv1i8( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vnmsac.mask.nxv1i8.nxv1i8( + , + , + , + , + i64); + +define @intrinsic_vnmsac_mask_vv_nxv1i8_nxv1i8_nxv1i8( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_mask_vv_nxv1i8_nxv1i8_nxv1i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vnmsac.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsac.mask.nxv1i8.nxv1i8( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vnmsac.nxv2i8.nxv2i8( + , + , + , + i64); + +define @intrinsic_vnmsac_vv_nxv2i8_nxv2i8_nxv2i8( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_vv_nxv2i8_nxv2i8_nxv2i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vnmsac.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsac.nxv2i8.nxv2i8( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vnmsac.mask.nxv2i8.nxv2i8( + , + , + , + , + i64); + +define @intrinsic_vnmsac_mask_vv_nxv2i8_nxv2i8_nxv2i8( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_mask_vv_nxv2i8_nxv2i8_nxv2i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vnmsac.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsac.mask.nxv2i8.nxv2i8( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vnmsac.nxv4i8.nxv4i8( + , + , + , + i64); + +define @intrinsic_vnmsac_vv_nxv4i8_nxv4i8_nxv4i8( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_vv_nxv4i8_nxv4i8_nxv4i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vnmsac.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsac.nxv4i8.nxv4i8( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vnmsac.mask.nxv4i8.nxv4i8( + , + , + , + , + i64); + +define @intrinsic_vnmsac_mask_vv_nxv4i8_nxv4i8_nxv4i8( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_mask_vv_nxv4i8_nxv4i8_nxv4i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vnmsac.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsac.mask.nxv4i8.nxv4i8( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vnmsac.nxv8i8.nxv8i8( + , + , + , + i64); + +define @intrinsic_vnmsac_vv_nxv8i8_nxv8i8_nxv8i8( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_vv_nxv8i8_nxv8i8_nxv8i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vnmsac.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsac.nxv8i8.nxv8i8( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vnmsac.mask.nxv8i8.nxv8i8( + , + , + , + , + i64); + +define @intrinsic_vnmsac_mask_vv_nxv8i8_nxv8i8_nxv8i8( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_mask_vv_nxv8i8_nxv8i8_nxv8i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vnmsac.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsac.mask.nxv8i8.nxv8i8( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vnmsac.nxv16i8.nxv16i8( + , + , + , + i64); + +define @intrinsic_vnmsac_vv_nxv16i8_nxv16i8_nxv16i8( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_vv_nxv16i8_nxv16i8_nxv16i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vnmsac.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsac.nxv16i8.nxv16i8( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vnmsac.mask.nxv16i8.nxv16i8( + , + , + , + , + i64); + +define @intrinsic_vnmsac_mask_vv_nxv16i8_nxv16i8_nxv16i8( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_mask_vv_nxv16i8_nxv16i8_nxv16i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vnmsac.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsac.mask.nxv16i8.nxv16i8( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vnmsac.nxv32i8.nxv32i8( + , + , + , + i64); + +define @intrinsic_vnmsac_vv_nxv32i8_nxv32i8_nxv32i8( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_vv_nxv32i8_nxv32i8_nxv32i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vnmsac.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsac.nxv32i8.nxv32i8( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vnmsac.mask.nxv32i8.nxv32i8( + , + , + , + , + i64); + +define @intrinsic_vnmsac_mask_vv_nxv32i8_nxv32i8_nxv32i8( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_mask_vv_nxv32i8_nxv32i8_nxv32i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vnmsac.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsac.mask.nxv32i8.nxv32i8( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vnmsac.nxv1i16.nxv1i16( + , + , + , + i64); + +define @intrinsic_vnmsac_vv_nxv1i16_nxv1i16_nxv1i16( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_vv_nxv1i16_nxv1i16_nxv1i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vnmsac.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsac.nxv1i16.nxv1i16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vnmsac.mask.nxv1i16.nxv1i16( + , + , + , + , + i64); + +define @intrinsic_vnmsac_mask_vv_nxv1i16_nxv1i16_nxv1i16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_mask_vv_nxv1i16_nxv1i16_nxv1i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vnmsac.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsac.mask.nxv1i16.nxv1i16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vnmsac.nxv2i16.nxv2i16( + , + , + , + i64); + +define @intrinsic_vnmsac_vv_nxv2i16_nxv2i16_nxv2i16( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_vv_nxv2i16_nxv2i16_nxv2i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vnmsac.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsac.nxv2i16.nxv2i16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vnmsac.mask.nxv2i16.nxv2i16( + , + , + , + , + i64); + +define @intrinsic_vnmsac_mask_vv_nxv2i16_nxv2i16_nxv2i16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_mask_vv_nxv2i16_nxv2i16_nxv2i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vnmsac.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsac.mask.nxv2i16.nxv2i16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vnmsac.nxv4i16.nxv4i16( + , + , + , + i64); + +define @intrinsic_vnmsac_vv_nxv4i16_nxv4i16_nxv4i16( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_vv_nxv4i16_nxv4i16_nxv4i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vnmsac.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsac.nxv4i16.nxv4i16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vnmsac.mask.nxv4i16.nxv4i16( + , + , + , + , + i64); + +define @intrinsic_vnmsac_mask_vv_nxv4i16_nxv4i16_nxv4i16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_mask_vv_nxv4i16_nxv4i16_nxv4i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vnmsac.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsac.mask.nxv4i16.nxv4i16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vnmsac.nxv8i16.nxv8i16( + , + , + , + i64); + +define @intrinsic_vnmsac_vv_nxv8i16_nxv8i16_nxv8i16( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_vv_nxv8i16_nxv8i16_nxv8i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vnmsac.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsac.nxv8i16.nxv8i16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vnmsac.mask.nxv8i16.nxv8i16( + , + , + , + , + i64); + +define @intrinsic_vnmsac_mask_vv_nxv8i16_nxv8i16_nxv8i16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_mask_vv_nxv8i16_nxv8i16_nxv8i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vnmsac.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsac.mask.nxv8i16.nxv8i16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vnmsac.nxv16i16.nxv16i16( + , + , + , + i64); + +define @intrinsic_vnmsac_vv_nxv16i16_nxv16i16_nxv16i16( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_vv_nxv16i16_nxv16i16_nxv16i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vnmsac.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsac.nxv16i16.nxv16i16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vnmsac.mask.nxv16i16.nxv16i16( + , + , + , + , + i64); + +define @intrinsic_vnmsac_mask_vv_nxv16i16_nxv16i16_nxv16i16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_mask_vv_nxv16i16_nxv16i16_nxv16i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vnmsac.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsac.mask.nxv16i16.nxv16i16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vnmsac.nxv1i32.nxv1i32( + , + , + , + i64); + +define @intrinsic_vnmsac_vv_nxv1i32_nxv1i32_nxv1i32( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_vv_nxv1i32_nxv1i32_nxv1i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vnmsac.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsac.nxv1i32.nxv1i32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vnmsac.mask.nxv1i32.nxv1i32( + , + , + , + , + i64); + +define @intrinsic_vnmsac_mask_vv_nxv1i32_nxv1i32_nxv1i32( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_mask_vv_nxv1i32_nxv1i32_nxv1i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vnmsac.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsac.mask.nxv1i32.nxv1i32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vnmsac.nxv2i32.nxv2i32( + , + , + , + i64); + +define @intrinsic_vnmsac_vv_nxv2i32_nxv2i32_nxv2i32( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_vv_nxv2i32_nxv2i32_nxv2i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vnmsac.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsac.nxv2i32.nxv2i32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vnmsac.mask.nxv2i32.nxv2i32( + , + , + , + , + i64); + +define @intrinsic_vnmsac_mask_vv_nxv2i32_nxv2i32_nxv2i32( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_mask_vv_nxv2i32_nxv2i32_nxv2i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vnmsac.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsac.mask.nxv2i32.nxv2i32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vnmsac.nxv4i32.nxv4i32( + , + , + , + i64); + +define @intrinsic_vnmsac_vv_nxv4i32_nxv4i32_nxv4i32( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_vv_nxv4i32_nxv4i32_nxv4i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vnmsac.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsac.nxv4i32.nxv4i32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vnmsac.mask.nxv4i32.nxv4i32( + , + , + , + , + i64); + +define @intrinsic_vnmsac_mask_vv_nxv4i32_nxv4i32_nxv4i32( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_mask_vv_nxv4i32_nxv4i32_nxv4i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vnmsac.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsac.mask.nxv4i32.nxv4i32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vnmsac.nxv8i32.nxv8i32( + , + , + , + i64); + +define @intrinsic_vnmsac_vv_nxv8i32_nxv8i32_nxv8i32( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_vv_nxv8i32_nxv8i32_nxv8i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vnmsac.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsac.nxv8i32.nxv8i32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vnmsac.mask.nxv8i32.nxv8i32( + , + , + , + , + i64); + +define @intrinsic_vnmsac_mask_vv_nxv8i32_nxv8i32_nxv8i32( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_mask_vv_nxv8i32_nxv8i32_nxv8i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vnmsac.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsac.mask.nxv8i32.nxv8i32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vnmsac.nxv1i64.nxv1i64( + , + , + , + i64); + +define @intrinsic_vnmsac_vv_nxv1i64_nxv1i64_nxv1i64( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_vv_nxv1i64_nxv1i64_nxv1i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vnmsac.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsac.nxv1i64.nxv1i64( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vnmsac.mask.nxv1i64.nxv1i64( + , + , + , + , + i64); + +define @intrinsic_vnmsac_mask_vv_nxv1i64_nxv1i64_nxv1i64( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_mask_vv_nxv1i64_nxv1i64_nxv1i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vnmsac.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsac.mask.nxv1i64.nxv1i64( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vnmsac.nxv2i64.nxv2i64( + , + , + , + i64); + +define @intrinsic_vnmsac_vv_nxv2i64_nxv2i64_nxv2i64( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_vv_nxv2i64_nxv2i64_nxv2i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vnmsac.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsac.nxv2i64.nxv2i64( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vnmsac.mask.nxv2i64.nxv2i64( + , + , + , + , + i64); + +define @intrinsic_vnmsac_mask_vv_nxv2i64_nxv2i64_nxv2i64( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_mask_vv_nxv2i64_nxv2i64_nxv2i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vnmsac.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsac.mask.nxv2i64.nxv2i64( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vnmsac.nxv4i64.nxv4i64( + , + , + , + i64); + +define @intrinsic_vnmsac_vv_nxv4i64_nxv4i64_nxv4i64( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_vv_nxv4i64_nxv4i64_nxv4i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vnmsac.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsac.nxv4i64.nxv4i64( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vnmsac.mask.nxv4i64.nxv4i64( + , + , + , + , + i64); + +define @intrinsic_vnmsac_mask_vv_nxv4i64_nxv4i64_nxv4i64( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_mask_vv_nxv4i64_nxv4i64_nxv4i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vnmsac.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsac.mask.nxv4i64.nxv4i64( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vnmsac.nxv1i8.i8( + , + i8, + , + i64); + +define @intrinsic_vnmsac_vx_nxv1i8_i8_nxv1i8( %0, i8 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_vx_nxv1i8_i8_nxv1i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vnmsac.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsac.nxv1i8.i8( + %0, + i8 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vnmsac.mask.nxv1i8.i8( + , + i8, + , + , + i64); + +define @intrinsic_vnmsac_mask_vx_nxv1i8_i8_nxv1i8( %0, i8 %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_mask_vx_nxv1i8_i8_nxv1i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vnmsac.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsac.mask.nxv1i8.i8( + %0, + i8 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vnmsac.nxv2i8.i8( + , + i8, + , + i64); + +define @intrinsic_vnmsac_vx_nxv2i8_i8_nxv2i8( %0, i8 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_vx_nxv2i8_i8_nxv2i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vnmsac.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsac.nxv2i8.i8( + %0, + i8 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vnmsac.mask.nxv2i8.i8( + , + i8, + , + , + i64); + +define @intrinsic_vnmsac_mask_vx_nxv2i8_i8_nxv2i8( %0, i8 %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_mask_vx_nxv2i8_i8_nxv2i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vnmsac.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsac.mask.nxv2i8.i8( + %0, + i8 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vnmsac.nxv4i8.i8( + , + i8, + , + i64); + +define @intrinsic_vnmsac_vx_nxv4i8_i8_nxv4i8( %0, i8 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_vx_nxv4i8_i8_nxv4i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vnmsac.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsac.nxv4i8.i8( + %0, + i8 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vnmsac.mask.nxv4i8.i8( + , + i8, + , + , + i64); + +define @intrinsic_vnmsac_mask_vx_nxv4i8_i8_nxv4i8( %0, i8 %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_mask_vx_nxv4i8_i8_nxv4i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vnmsac.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsac.mask.nxv4i8.i8( + %0, + i8 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vnmsac.nxv8i8.i8( + , + i8, + , + i64); + +define @intrinsic_vnmsac_vx_nxv8i8_i8_nxv8i8( %0, i8 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_vx_nxv8i8_i8_nxv8i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vnmsac.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsac.nxv8i8.i8( + %0, + i8 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vnmsac.mask.nxv8i8.i8( + , + i8, + , + , + i64); + +define @intrinsic_vnmsac_mask_vx_nxv8i8_i8_nxv8i8( %0, i8 %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_mask_vx_nxv8i8_i8_nxv8i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vnmsac.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsac.mask.nxv8i8.i8( + %0, + i8 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vnmsac.nxv16i8.i8( + , + i8, + , + i64); + +define @intrinsic_vnmsac_vx_nxv16i8_i8_nxv16i8( %0, i8 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_vx_nxv16i8_i8_nxv16i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vnmsac.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsac.nxv16i8.i8( + %0, + i8 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vnmsac.mask.nxv16i8.i8( + , + i8, + , + , + i64); + +define @intrinsic_vnmsac_mask_vx_nxv16i8_i8_nxv16i8( %0, i8 %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_mask_vx_nxv16i8_i8_nxv16i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vnmsac.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsac.mask.nxv16i8.i8( + %0, + i8 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vnmsac.nxv32i8.i8( + , + i8, + , + i64); + +define @intrinsic_vnmsac_vx_nxv32i8_i8_nxv32i8( %0, i8 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_vx_nxv32i8_i8_nxv32i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vnmsac.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsac.nxv32i8.i8( + %0, + i8 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vnmsac.mask.nxv32i8.i8( + , + i8, + , + , + i64); + +define @intrinsic_vnmsac_mask_vx_nxv32i8_i8_nxv32i8( %0, i8 %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_mask_vx_nxv32i8_i8_nxv32i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vnmsac.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsac.mask.nxv32i8.i8( + %0, + i8 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vnmsac.nxv1i16.i16( + , + i16, + , + i64); + +define @intrinsic_vnmsac_vx_nxv1i16_i16_nxv1i16( %0, i16 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_vx_nxv1i16_i16_nxv1i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vnmsac.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsac.nxv1i16.i16( + %0, + i16 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vnmsac.mask.nxv1i16.i16( + , + i16, + , + , + i64); + +define @intrinsic_vnmsac_mask_vx_nxv1i16_i16_nxv1i16( %0, i16 %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_mask_vx_nxv1i16_i16_nxv1i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vnmsac.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsac.mask.nxv1i16.i16( + %0, + i16 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vnmsac.nxv2i16.i16( + , + i16, + , + i64); + +define @intrinsic_vnmsac_vx_nxv2i16_i16_nxv2i16( %0, i16 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_vx_nxv2i16_i16_nxv2i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vnmsac.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsac.nxv2i16.i16( + %0, + i16 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vnmsac.mask.nxv2i16.i16( + , + i16, + , + , + i64); + +define @intrinsic_vnmsac_mask_vx_nxv2i16_i16_nxv2i16( %0, i16 %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_mask_vx_nxv2i16_i16_nxv2i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vnmsac.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsac.mask.nxv2i16.i16( + %0, + i16 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vnmsac.nxv4i16.i16( + , + i16, + , + i64); + +define @intrinsic_vnmsac_vx_nxv4i16_i16_nxv4i16( %0, i16 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_vx_nxv4i16_i16_nxv4i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vnmsac.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsac.nxv4i16.i16( + %0, + i16 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vnmsac.mask.nxv4i16.i16( + , + i16, + , + , + i64); + +define @intrinsic_vnmsac_mask_vx_nxv4i16_i16_nxv4i16( %0, i16 %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_mask_vx_nxv4i16_i16_nxv4i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vnmsac.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsac.mask.nxv4i16.i16( + %0, + i16 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vnmsac.nxv8i16.i16( + , + i16, + , + i64); + +define @intrinsic_vnmsac_vx_nxv8i16_i16_nxv8i16( %0, i16 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_vx_nxv8i16_i16_nxv8i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vnmsac.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsac.nxv8i16.i16( + %0, + i16 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vnmsac.mask.nxv8i16.i16( + , + i16, + , + , + i64); + +define @intrinsic_vnmsac_mask_vx_nxv8i16_i16_nxv8i16( %0, i16 %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_mask_vx_nxv8i16_i16_nxv8i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vnmsac.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsac.mask.nxv8i16.i16( + %0, + i16 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vnmsac.nxv16i16.i16( + , + i16, + , + i64); + +define @intrinsic_vnmsac_vx_nxv16i16_i16_nxv16i16( %0, i16 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_vx_nxv16i16_i16_nxv16i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vnmsac.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsac.nxv16i16.i16( + %0, + i16 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vnmsac.mask.nxv16i16.i16( + , + i16, + , + , + i64); + +define @intrinsic_vnmsac_mask_vx_nxv16i16_i16_nxv16i16( %0, i16 %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_mask_vx_nxv16i16_i16_nxv16i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vnmsac.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsac.mask.nxv16i16.i16( + %0, + i16 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vnmsac.nxv1i32.i32( + , + i32, + , + i64); + +define @intrinsic_vnmsac_vx_nxv1i32_i32_nxv1i32( %0, i32 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_vx_nxv1i32_i32_nxv1i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vnmsac.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsac.nxv1i32.i32( + %0, + i32 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vnmsac.mask.nxv1i32.i32( + , + i32, + , + , + i64); + +define @intrinsic_vnmsac_mask_vx_nxv1i32_i32_nxv1i32( %0, i32 %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_mask_vx_nxv1i32_i32_nxv1i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vnmsac.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsac.mask.nxv1i32.i32( + %0, + i32 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vnmsac.nxv2i32.i32( + , + i32, + , + i64); + +define @intrinsic_vnmsac_vx_nxv2i32_i32_nxv2i32( %0, i32 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_vx_nxv2i32_i32_nxv2i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vnmsac.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsac.nxv2i32.i32( + %0, + i32 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vnmsac.mask.nxv2i32.i32( + , + i32, + , + , + i64); + +define @intrinsic_vnmsac_mask_vx_nxv2i32_i32_nxv2i32( %0, i32 %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_mask_vx_nxv2i32_i32_nxv2i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vnmsac.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsac.mask.nxv2i32.i32( + %0, + i32 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vnmsac.nxv4i32.i32( + , + i32, + , + i64); + +define @intrinsic_vnmsac_vx_nxv4i32_i32_nxv4i32( %0, i32 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_vx_nxv4i32_i32_nxv4i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vnmsac.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsac.nxv4i32.i32( + %0, + i32 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vnmsac.mask.nxv4i32.i32( + , + i32, + , + , + i64); + +define @intrinsic_vnmsac_mask_vx_nxv4i32_i32_nxv4i32( %0, i32 %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_mask_vx_nxv4i32_i32_nxv4i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vnmsac.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsac.mask.nxv4i32.i32( + %0, + i32 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vnmsac.nxv8i32.i32( + , + i32, + , + i64); + +define @intrinsic_vnmsac_vx_nxv8i32_i32_nxv8i32( %0, i32 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_vx_nxv8i32_i32_nxv8i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vnmsac.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsac.nxv8i32.i32( + %0, + i32 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vnmsac.mask.nxv8i32.i32( + , + i32, + , + , + i64); + +define @intrinsic_vnmsac_mask_vx_nxv8i32_i32_nxv8i32( %0, i32 %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_mask_vx_nxv8i32_i32_nxv8i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vnmsac.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsac.mask.nxv8i32.i32( + %0, + i32 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vnmsac.nxv1i64.i64( + , + i64, + , + i64); + +define @intrinsic_vnmsac_vx_nxv1i64_i64_nxv1i64( %0, i64 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_vx_nxv1i64_i64_nxv1i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vnmsac.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsac.nxv1i64.i64( + %0, + i64 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vnmsac.mask.nxv1i64.i64( + , + i64, + , + , + i64); + +define @intrinsic_vnmsac_mask_vx_nxv1i64_i64_nxv1i64( %0, i64 %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_mask_vx_nxv1i64_i64_nxv1i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vnmsac.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsac.mask.nxv1i64.i64( + %0, + i64 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vnmsac.nxv2i64.i64( + , + i64, + , + i64); + +define @intrinsic_vnmsac_vx_nxv2i64_i64_nxv2i64( %0, i64 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_vx_nxv2i64_i64_nxv2i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vnmsac.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsac.nxv2i64.i64( + %0, + i64 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vnmsac.mask.nxv2i64.i64( + , + i64, + , + , + i64); + +define @intrinsic_vnmsac_mask_vx_nxv2i64_i64_nxv2i64( %0, i64 %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_mask_vx_nxv2i64_i64_nxv2i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vnmsac.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsac.mask.nxv2i64.i64( + %0, + i64 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vnmsac.nxv4i64.i64( + , + i64, + , + i64); + +define @intrinsic_vnmsac_vx_nxv4i64_i64_nxv4i64( %0, i64 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_vx_nxv4i64_i64_nxv4i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vnmsac.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsac.nxv4i64.i64( + %0, + i64 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vnmsac.mask.nxv4i64.i64( + , + i64, + , + , + i64); + +define @intrinsic_vnmsac_mask_vx_nxv4i64_i64_nxv4i64( %0, i64 %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsac_mask_vx_nxv4i64_i64_nxv4i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vnmsac.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsac.mask.nxv4i64.i64( + %0, + i64 %1, + %2, + %3, + i64 %4) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vnmsub-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vnmsub-rv32.ll new file mode 100644 index 0000000000000..3c01f60e9df5a --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vnmsub-rv32.ll @@ -0,0 +1,1261 @@ +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare @llvm.riscv.vnmsub.nxv1i8.nxv1i8( + , + , + , + i32); + +define @intrinsic_vnmsub_vv_nxv1i8_nxv1i8_nxv1i8( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_vv_nxv1i8_nxv1i8_nxv1i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vnmsub.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsub.nxv1i8.nxv1i8( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vnmsub.mask.nxv1i8.nxv1i8( + , + , + , + , + i32); + +define @intrinsic_vnmsub_mask_vv_nxv1i8_nxv1i8_nxv1i8( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_mask_vv_nxv1i8_nxv1i8_nxv1i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vnmsub.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsub.mask.nxv1i8.nxv1i8( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vnmsub.nxv2i8.nxv2i8( + , + , + , + i32); + +define @intrinsic_vnmsub_vv_nxv2i8_nxv2i8_nxv2i8( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_vv_nxv2i8_nxv2i8_nxv2i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vnmsub.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsub.nxv2i8.nxv2i8( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vnmsub.mask.nxv2i8.nxv2i8( + , + , + , + , + i32); + +define @intrinsic_vnmsub_mask_vv_nxv2i8_nxv2i8_nxv2i8( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_mask_vv_nxv2i8_nxv2i8_nxv2i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vnmsub.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsub.mask.nxv2i8.nxv2i8( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vnmsub.nxv4i8.nxv4i8( + , + , + , + i32); + +define @intrinsic_vnmsub_vv_nxv4i8_nxv4i8_nxv4i8( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_vv_nxv4i8_nxv4i8_nxv4i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vnmsub.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsub.nxv4i8.nxv4i8( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vnmsub.mask.nxv4i8.nxv4i8( + , + , + , + , + i32); + +define @intrinsic_vnmsub_mask_vv_nxv4i8_nxv4i8_nxv4i8( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_mask_vv_nxv4i8_nxv4i8_nxv4i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vnmsub.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsub.mask.nxv4i8.nxv4i8( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vnmsub.nxv8i8.nxv8i8( + , + , + , + i32); + +define @intrinsic_vnmsub_vv_nxv8i8_nxv8i8_nxv8i8( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_vv_nxv8i8_nxv8i8_nxv8i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vnmsub.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsub.nxv8i8.nxv8i8( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vnmsub.mask.nxv8i8.nxv8i8( + , + , + , + , + i32); + +define @intrinsic_vnmsub_mask_vv_nxv8i8_nxv8i8_nxv8i8( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_mask_vv_nxv8i8_nxv8i8_nxv8i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vnmsub.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsub.mask.nxv8i8.nxv8i8( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vnmsub.nxv16i8.nxv16i8( + , + , + , + i32); + +define @intrinsic_vnmsub_vv_nxv16i8_nxv16i8_nxv16i8( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_vv_nxv16i8_nxv16i8_nxv16i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vnmsub.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsub.nxv16i8.nxv16i8( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vnmsub.mask.nxv16i8.nxv16i8( + , + , + , + , + i32); + +define @intrinsic_vnmsub_mask_vv_nxv16i8_nxv16i8_nxv16i8( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_mask_vv_nxv16i8_nxv16i8_nxv16i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vnmsub.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsub.mask.nxv16i8.nxv16i8( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vnmsub.nxv32i8.nxv32i8( + , + , + , + i32); + +define @intrinsic_vnmsub_vv_nxv32i8_nxv32i8_nxv32i8( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_vv_nxv32i8_nxv32i8_nxv32i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vnmsub.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsub.nxv32i8.nxv32i8( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vnmsub.mask.nxv32i8.nxv32i8( + , + , + , + , + i32); + +define @intrinsic_vnmsub_mask_vv_nxv32i8_nxv32i8_nxv32i8( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_mask_vv_nxv32i8_nxv32i8_nxv32i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vnmsub.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsub.mask.nxv32i8.nxv32i8( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vnmsub.nxv1i16.nxv1i16( + , + , + , + i32); + +define @intrinsic_vnmsub_vv_nxv1i16_nxv1i16_nxv1i16( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_vv_nxv1i16_nxv1i16_nxv1i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vnmsub.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsub.nxv1i16.nxv1i16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vnmsub.mask.nxv1i16.nxv1i16( + , + , + , + , + i32); + +define @intrinsic_vnmsub_mask_vv_nxv1i16_nxv1i16_nxv1i16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_mask_vv_nxv1i16_nxv1i16_nxv1i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vnmsub.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsub.mask.nxv1i16.nxv1i16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vnmsub.nxv2i16.nxv2i16( + , + , + , + i32); + +define @intrinsic_vnmsub_vv_nxv2i16_nxv2i16_nxv2i16( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_vv_nxv2i16_nxv2i16_nxv2i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vnmsub.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsub.nxv2i16.nxv2i16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vnmsub.mask.nxv2i16.nxv2i16( + , + , + , + , + i32); + +define @intrinsic_vnmsub_mask_vv_nxv2i16_nxv2i16_nxv2i16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_mask_vv_nxv2i16_nxv2i16_nxv2i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vnmsub.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsub.mask.nxv2i16.nxv2i16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vnmsub.nxv4i16.nxv4i16( + , + , + , + i32); + +define @intrinsic_vnmsub_vv_nxv4i16_nxv4i16_nxv4i16( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_vv_nxv4i16_nxv4i16_nxv4i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vnmsub.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsub.nxv4i16.nxv4i16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vnmsub.mask.nxv4i16.nxv4i16( + , + , + , + , + i32); + +define @intrinsic_vnmsub_mask_vv_nxv4i16_nxv4i16_nxv4i16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_mask_vv_nxv4i16_nxv4i16_nxv4i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vnmsub.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsub.mask.nxv4i16.nxv4i16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vnmsub.nxv8i16.nxv8i16( + , + , + , + i32); + +define @intrinsic_vnmsub_vv_nxv8i16_nxv8i16_nxv8i16( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_vv_nxv8i16_nxv8i16_nxv8i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vnmsub.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsub.nxv8i16.nxv8i16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vnmsub.mask.nxv8i16.nxv8i16( + , + , + , + , + i32); + +define @intrinsic_vnmsub_mask_vv_nxv8i16_nxv8i16_nxv8i16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_mask_vv_nxv8i16_nxv8i16_nxv8i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vnmsub.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsub.mask.nxv8i16.nxv8i16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vnmsub.nxv16i16.nxv16i16( + , + , + , + i32); + +define @intrinsic_vnmsub_vv_nxv16i16_nxv16i16_nxv16i16( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_vv_nxv16i16_nxv16i16_nxv16i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vnmsub.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsub.nxv16i16.nxv16i16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vnmsub.mask.nxv16i16.nxv16i16( + , + , + , + , + i32); + +define @intrinsic_vnmsub_mask_vv_nxv16i16_nxv16i16_nxv16i16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_mask_vv_nxv16i16_nxv16i16_nxv16i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vnmsub.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsub.mask.nxv16i16.nxv16i16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vnmsub.nxv1i32.nxv1i32( + , + , + , + i32); + +define @intrinsic_vnmsub_vv_nxv1i32_nxv1i32_nxv1i32( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_vv_nxv1i32_nxv1i32_nxv1i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vnmsub.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsub.nxv1i32.nxv1i32( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vnmsub.mask.nxv1i32.nxv1i32( + , + , + , + , + i32); + +define @intrinsic_vnmsub_mask_vv_nxv1i32_nxv1i32_nxv1i32( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_mask_vv_nxv1i32_nxv1i32_nxv1i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vnmsub.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsub.mask.nxv1i32.nxv1i32( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vnmsub.nxv2i32.nxv2i32( + , + , + , + i32); + +define @intrinsic_vnmsub_vv_nxv2i32_nxv2i32_nxv2i32( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_vv_nxv2i32_nxv2i32_nxv2i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vnmsub.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsub.nxv2i32.nxv2i32( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vnmsub.mask.nxv2i32.nxv2i32( + , + , + , + , + i32); + +define @intrinsic_vnmsub_mask_vv_nxv2i32_nxv2i32_nxv2i32( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_mask_vv_nxv2i32_nxv2i32_nxv2i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vnmsub.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsub.mask.nxv2i32.nxv2i32( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vnmsub.nxv4i32.nxv4i32( + , + , + , + i32); + +define @intrinsic_vnmsub_vv_nxv4i32_nxv4i32_nxv4i32( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_vv_nxv4i32_nxv4i32_nxv4i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vnmsub.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsub.nxv4i32.nxv4i32( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vnmsub.mask.nxv4i32.nxv4i32( + , + , + , + , + i32); + +define @intrinsic_vnmsub_mask_vv_nxv4i32_nxv4i32_nxv4i32( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_mask_vv_nxv4i32_nxv4i32_nxv4i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vnmsub.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsub.mask.nxv4i32.nxv4i32( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vnmsub.nxv8i32.nxv8i32( + , + , + , + i32); + +define @intrinsic_vnmsub_vv_nxv8i32_nxv8i32_nxv8i32( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_vv_nxv8i32_nxv8i32_nxv8i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vnmsub.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsub.nxv8i32.nxv8i32( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vnmsub.mask.nxv8i32.nxv8i32( + , + , + , + , + i32); + +define @intrinsic_vnmsub_mask_vv_nxv8i32_nxv8i32_nxv8i32( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_mask_vv_nxv8i32_nxv8i32_nxv8i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vnmsub.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsub.mask.nxv8i32.nxv8i32( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vnmsub.nxv1i8.i8( + , + i8, + , + i32); + +define @intrinsic_vnmsub_vx_nxv1i8_i8_nxv1i8( %0, i8 %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_vx_nxv1i8_i8_nxv1i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vnmsub.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsub.nxv1i8.i8( + %0, + i8 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vnmsub.mask.nxv1i8.i8( + , + i8, + , + , + i32); + +define @intrinsic_vnmsub_mask_vx_nxv1i8_i8_nxv1i8( %0, i8 %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_mask_vx_nxv1i8_i8_nxv1i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vnmsub.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsub.mask.nxv1i8.i8( + %0, + i8 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vnmsub.nxv2i8.i8( + , + i8, + , + i32); + +define @intrinsic_vnmsub_vx_nxv2i8_i8_nxv2i8( %0, i8 %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_vx_nxv2i8_i8_nxv2i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vnmsub.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsub.nxv2i8.i8( + %0, + i8 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vnmsub.mask.nxv2i8.i8( + , + i8, + , + , + i32); + +define @intrinsic_vnmsub_mask_vx_nxv2i8_i8_nxv2i8( %0, i8 %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_mask_vx_nxv2i8_i8_nxv2i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vnmsub.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsub.mask.nxv2i8.i8( + %0, + i8 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vnmsub.nxv4i8.i8( + , + i8, + , + i32); + +define @intrinsic_vnmsub_vx_nxv4i8_i8_nxv4i8( %0, i8 %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_vx_nxv4i8_i8_nxv4i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vnmsub.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsub.nxv4i8.i8( + %0, + i8 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vnmsub.mask.nxv4i8.i8( + , + i8, + , + , + i32); + +define @intrinsic_vnmsub_mask_vx_nxv4i8_i8_nxv4i8( %0, i8 %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_mask_vx_nxv4i8_i8_nxv4i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vnmsub.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsub.mask.nxv4i8.i8( + %0, + i8 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vnmsub.nxv8i8.i8( + , + i8, + , + i32); + +define @intrinsic_vnmsub_vx_nxv8i8_i8_nxv8i8( %0, i8 %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_vx_nxv8i8_i8_nxv8i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vnmsub.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsub.nxv8i8.i8( + %0, + i8 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vnmsub.mask.nxv8i8.i8( + , + i8, + , + , + i32); + +define @intrinsic_vnmsub_mask_vx_nxv8i8_i8_nxv8i8( %0, i8 %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_mask_vx_nxv8i8_i8_nxv8i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vnmsub.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsub.mask.nxv8i8.i8( + %0, + i8 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vnmsub.nxv16i8.i8( + , + i8, + , + i32); + +define @intrinsic_vnmsub_vx_nxv16i8_i8_nxv16i8( %0, i8 %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_vx_nxv16i8_i8_nxv16i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vnmsub.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsub.nxv16i8.i8( + %0, + i8 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vnmsub.mask.nxv16i8.i8( + , + i8, + , + , + i32); + +define @intrinsic_vnmsub_mask_vx_nxv16i8_i8_nxv16i8( %0, i8 %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_mask_vx_nxv16i8_i8_nxv16i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vnmsub.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsub.mask.nxv16i8.i8( + %0, + i8 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vnmsub.nxv32i8.i8( + , + i8, + , + i32); + +define @intrinsic_vnmsub_vx_nxv32i8_i8_nxv32i8( %0, i8 %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_vx_nxv32i8_i8_nxv32i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vnmsub.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsub.nxv32i8.i8( + %0, + i8 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vnmsub.mask.nxv32i8.i8( + , + i8, + , + , + i32); + +define @intrinsic_vnmsub_mask_vx_nxv32i8_i8_nxv32i8( %0, i8 %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_mask_vx_nxv32i8_i8_nxv32i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vnmsub.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsub.mask.nxv32i8.i8( + %0, + i8 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vnmsub.nxv1i16.i16( + , + i16, + , + i32); + +define @intrinsic_vnmsub_vx_nxv1i16_i16_nxv1i16( %0, i16 %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_vx_nxv1i16_i16_nxv1i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vnmsub.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsub.nxv1i16.i16( + %0, + i16 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vnmsub.mask.nxv1i16.i16( + , + i16, + , + , + i32); + +define @intrinsic_vnmsub_mask_vx_nxv1i16_i16_nxv1i16( %0, i16 %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_mask_vx_nxv1i16_i16_nxv1i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vnmsub.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsub.mask.nxv1i16.i16( + %0, + i16 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vnmsub.nxv2i16.i16( + , + i16, + , + i32); + +define @intrinsic_vnmsub_vx_nxv2i16_i16_nxv2i16( %0, i16 %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_vx_nxv2i16_i16_nxv2i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vnmsub.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsub.nxv2i16.i16( + %0, + i16 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vnmsub.mask.nxv2i16.i16( + , + i16, + , + , + i32); + +define @intrinsic_vnmsub_mask_vx_nxv2i16_i16_nxv2i16( %0, i16 %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_mask_vx_nxv2i16_i16_nxv2i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vnmsub.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsub.mask.nxv2i16.i16( + %0, + i16 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vnmsub.nxv4i16.i16( + , + i16, + , + i32); + +define @intrinsic_vnmsub_vx_nxv4i16_i16_nxv4i16( %0, i16 %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_vx_nxv4i16_i16_nxv4i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vnmsub.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsub.nxv4i16.i16( + %0, + i16 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vnmsub.mask.nxv4i16.i16( + , + i16, + , + , + i32); + +define @intrinsic_vnmsub_mask_vx_nxv4i16_i16_nxv4i16( %0, i16 %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_mask_vx_nxv4i16_i16_nxv4i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vnmsub.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsub.mask.nxv4i16.i16( + %0, + i16 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vnmsub.nxv8i16.i16( + , + i16, + , + i32); + +define @intrinsic_vnmsub_vx_nxv8i16_i16_nxv8i16( %0, i16 %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_vx_nxv8i16_i16_nxv8i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vnmsub.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsub.nxv8i16.i16( + %0, + i16 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vnmsub.mask.nxv8i16.i16( + , + i16, + , + , + i32); + +define @intrinsic_vnmsub_mask_vx_nxv8i16_i16_nxv8i16( %0, i16 %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_mask_vx_nxv8i16_i16_nxv8i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vnmsub.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsub.mask.nxv8i16.i16( + %0, + i16 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vnmsub.nxv16i16.i16( + , + i16, + , + i32); + +define @intrinsic_vnmsub_vx_nxv16i16_i16_nxv16i16( %0, i16 %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_vx_nxv16i16_i16_nxv16i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vnmsub.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsub.nxv16i16.i16( + %0, + i16 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vnmsub.mask.nxv16i16.i16( + , + i16, + , + , + i32); + +define @intrinsic_vnmsub_mask_vx_nxv16i16_i16_nxv16i16( %0, i16 %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_mask_vx_nxv16i16_i16_nxv16i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vnmsub.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsub.mask.nxv16i16.i16( + %0, + i16 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vnmsub.nxv1i32.i32( + , + i32, + , + i32); + +define @intrinsic_vnmsub_vx_nxv1i32_i32_nxv1i32( %0, i32 %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_vx_nxv1i32_i32_nxv1i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vnmsub.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsub.nxv1i32.i32( + %0, + i32 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vnmsub.mask.nxv1i32.i32( + , + i32, + , + , + i32); + +define @intrinsic_vnmsub_mask_vx_nxv1i32_i32_nxv1i32( %0, i32 %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_mask_vx_nxv1i32_i32_nxv1i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vnmsub.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsub.mask.nxv1i32.i32( + %0, + i32 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vnmsub.nxv2i32.i32( + , + i32, + , + i32); + +define @intrinsic_vnmsub_vx_nxv2i32_i32_nxv2i32( %0, i32 %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_vx_nxv2i32_i32_nxv2i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vnmsub.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsub.nxv2i32.i32( + %0, + i32 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vnmsub.mask.nxv2i32.i32( + , + i32, + , + , + i32); + +define @intrinsic_vnmsub_mask_vx_nxv2i32_i32_nxv2i32( %0, i32 %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_mask_vx_nxv2i32_i32_nxv2i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vnmsub.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsub.mask.nxv2i32.i32( + %0, + i32 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vnmsub.nxv4i32.i32( + , + i32, + , + i32); + +define @intrinsic_vnmsub_vx_nxv4i32_i32_nxv4i32( %0, i32 %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_vx_nxv4i32_i32_nxv4i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vnmsub.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsub.nxv4i32.i32( + %0, + i32 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vnmsub.mask.nxv4i32.i32( + , + i32, + , + , + i32); + +define @intrinsic_vnmsub_mask_vx_nxv4i32_i32_nxv4i32( %0, i32 %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_mask_vx_nxv4i32_i32_nxv4i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vnmsub.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsub.mask.nxv4i32.i32( + %0, + i32 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vnmsub.nxv8i32.i32( + , + i32, + , + i32); + +define @intrinsic_vnmsub_vx_nxv8i32_i32_nxv8i32( %0, i32 %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_vx_nxv8i32_i32_nxv8i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vnmsub.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsub.nxv8i32.i32( + %0, + i32 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vnmsub.mask.nxv8i32.i32( + , + i32, + , + , + i32); + +define @intrinsic_vnmsub_mask_vx_nxv8i32_i32_nxv8i32( %0, i32 %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_mask_vx_nxv8i32_i32_nxv8i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vnmsub.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsub.mask.nxv8i32.i32( + %0, + i32 %1, + %2, + %3, + i32 %4) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vnmsub-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vnmsub-rv64.ll new file mode 100644 index 0000000000000..dd9d6ec2280b6 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vnmsub-rv64.ll @@ -0,0 +1,1513 @@ +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare @llvm.riscv.vnmsub.nxv1i8.nxv1i8( + , + , + , + i64); + +define @intrinsic_vnmsub_vv_nxv1i8_nxv1i8_nxv1i8( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_vv_nxv1i8_nxv1i8_nxv1i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vnmsub.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsub.nxv1i8.nxv1i8( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vnmsub.mask.nxv1i8.nxv1i8( + , + , + , + , + i64); + +define @intrinsic_vnmsub_mask_vv_nxv1i8_nxv1i8_nxv1i8( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_mask_vv_nxv1i8_nxv1i8_nxv1i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vnmsub.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsub.mask.nxv1i8.nxv1i8( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vnmsub.nxv2i8.nxv2i8( + , + , + , + i64); + +define @intrinsic_vnmsub_vv_nxv2i8_nxv2i8_nxv2i8( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_vv_nxv2i8_nxv2i8_nxv2i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vnmsub.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsub.nxv2i8.nxv2i8( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vnmsub.mask.nxv2i8.nxv2i8( + , + , + , + , + i64); + +define @intrinsic_vnmsub_mask_vv_nxv2i8_nxv2i8_nxv2i8( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_mask_vv_nxv2i8_nxv2i8_nxv2i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vnmsub.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsub.mask.nxv2i8.nxv2i8( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vnmsub.nxv4i8.nxv4i8( + , + , + , + i64); + +define @intrinsic_vnmsub_vv_nxv4i8_nxv4i8_nxv4i8( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_vv_nxv4i8_nxv4i8_nxv4i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vnmsub.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsub.nxv4i8.nxv4i8( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vnmsub.mask.nxv4i8.nxv4i8( + , + , + , + , + i64); + +define @intrinsic_vnmsub_mask_vv_nxv4i8_nxv4i8_nxv4i8( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_mask_vv_nxv4i8_nxv4i8_nxv4i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vnmsub.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsub.mask.nxv4i8.nxv4i8( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vnmsub.nxv8i8.nxv8i8( + , + , + , + i64); + +define @intrinsic_vnmsub_vv_nxv8i8_nxv8i8_nxv8i8( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_vv_nxv8i8_nxv8i8_nxv8i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vnmsub.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsub.nxv8i8.nxv8i8( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vnmsub.mask.nxv8i8.nxv8i8( + , + , + , + , + i64); + +define @intrinsic_vnmsub_mask_vv_nxv8i8_nxv8i8_nxv8i8( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_mask_vv_nxv8i8_nxv8i8_nxv8i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vnmsub.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsub.mask.nxv8i8.nxv8i8( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vnmsub.nxv16i8.nxv16i8( + , + , + , + i64); + +define @intrinsic_vnmsub_vv_nxv16i8_nxv16i8_nxv16i8( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_vv_nxv16i8_nxv16i8_nxv16i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vnmsub.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsub.nxv16i8.nxv16i8( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vnmsub.mask.nxv16i8.nxv16i8( + , + , + , + , + i64); + +define @intrinsic_vnmsub_mask_vv_nxv16i8_nxv16i8_nxv16i8( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_mask_vv_nxv16i8_nxv16i8_nxv16i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vnmsub.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsub.mask.nxv16i8.nxv16i8( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vnmsub.nxv32i8.nxv32i8( + , + , + , + i64); + +define @intrinsic_vnmsub_vv_nxv32i8_nxv32i8_nxv32i8( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_vv_nxv32i8_nxv32i8_nxv32i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vnmsub.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsub.nxv32i8.nxv32i8( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vnmsub.mask.nxv32i8.nxv32i8( + , + , + , + , + i64); + +define @intrinsic_vnmsub_mask_vv_nxv32i8_nxv32i8_nxv32i8( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_mask_vv_nxv32i8_nxv32i8_nxv32i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vnmsub.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsub.mask.nxv32i8.nxv32i8( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vnmsub.nxv1i16.nxv1i16( + , + , + , + i64); + +define @intrinsic_vnmsub_vv_nxv1i16_nxv1i16_nxv1i16( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_vv_nxv1i16_nxv1i16_nxv1i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vnmsub.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsub.nxv1i16.nxv1i16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vnmsub.mask.nxv1i16.nxv1i16( + , + , + , + , + i64); + +define @intrinsic_vnmsub_mask_vv_nxv1i16_nxv1i16_nxv1i16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_mask_vv_nxv1i16_nxv1i16_nxv1i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vnmsub.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsub.mask.nxv1i16.nxv1i16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vnmsub.nxv2i16.nxv2i16( + , + , + , + i64); + +define @intrinsic_vnmsub_vv_nxv2i16_nxv2i16_nxv2i16( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_vv_nxv2i16_nxv2i16_nxv2i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vnmsub.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsub.nxv2i16.nxv2i16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vnmsub.mask.nxv2i16.nxv2i16( + , + , + , + , + i64); + +define @intrinsic_vnmsub_mask_vv_nxv2i16_nxv2i16_nxv2i16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_mask_vv_nxv2i16_nxv2i16_nxv2i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vnmsub.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsub.mask.nxv2i16.nxv2i16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vnmsub.nxv4i16.nxv4i16( + , + , + , + i64); + +define @intrinsic_vnmsub_vv_nxv4i16_nxv4i16_nxv4i16( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_vv_nxv4i16_nxv4i16_nxv4i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vnmsub.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsub.nxv4i16.nxv4i16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vnmsub.mask.nxv4i16.nxv4i16( + , + , + , + , + i64); + +define @intrinsic_vnmsub_mask_vv_nxv4i16_nxv4i16_nxv4i16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_mask_vv_nxv4i16_nxv4i16_nxv4i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vnmsub.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsub.mask.nxv4i16.nxv4i16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vnmsub.nxv8i16.nxv8i16( + , + , + , + i64); + +define @intrinsic_vnmsub_vv_nxv8i16_nxv8i16_nxv8i16( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_vv_nxv8i16_nxv8i16_nxv8i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vnmsub.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsub.nxv8i16.nxv8i16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vnmsub.mask.nxv8i16.nxv8i16( + , + , + , + , + i64); + +define @intrinsic_vnmsub_mask_vv_nxv8i16_nxv8i16_nxv8i16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_mask_vv_nxv8i16_nxv8i16_nxv8i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vnmsub.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsub.mask.nxv8i16.nxv8i16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vnmsub.nxv16i16.nxv16i16( + , + , + , + i64); + +define @intrinsic_vnmsub_vv_nxv16i16_nxv16i16_nxv16i16( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_vv_nxv16i16_nxv16i16_nxv16i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vnmsub.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsub.nxv16i16.nxv16i16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vnmsub.mask.nxv16i16.nxv16i16( + , + , + , + , + i64); + +define @intrinsic_vnmsub_mask_vv_nxv16i16_nxv16i16_nxv16i16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_mask_vv_nxv16i16_nxv16i16_nxv16i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vnmsub.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsub.mask.nxv16i16.nxv16i16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vnmsub.nxv1i32.nxv1i32( + , + , + , + i64); + +define @intrinsic_vnmsub_vv_nxv1i32_nxv1i32_nxv1i32( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_vv_nxv1i32_nxv1i32_nxv1i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vnmsub.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsub.nxv1i32.nxv1i32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vnmsub.mask.nxv1i32.nxv1i32( + , + , + , + , + i64); + +define @intrinsic_vnmsub_mask_vv_nxv1i32_nxv1i32_nxv1i32( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_mask_vv_nxv1i32_nxv1i32_nxv1i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vnmsub.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsub.mask.nxv1i32.nxv1i32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vnmsub.nxv2i32.nxv2i32( + , + , + , + i64); + +define @intrinsic_vnmsub_vv_nxv2i32_nxv2i32_nxv2i32( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_vv_nxv2i32_nxv2i32_nxv2i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vnmsub.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsub.nxv2i32.nxv2i32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vnmsub.mask.nxv2i32.nxv2i32( + , + , + , + , + i64); + +define @intrinsic_vnmsub_mask_vv_nxv2i32_nxv2i32_nxv2i32( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_mask_vv_nxv2i32_nxv2i32_nxv2i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vnmsub.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsub.mask.nxv2i32.nxv2i32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vnmsub.nxv4i32.nxv4i32( + , + , + , + i64); + +define @intrinsic_vnmsub_vv_nxv4i32_nxv4i32_nxv4i32( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_vv_nxv4i32_nxv4i32_nxv4i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vnmsub.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsub.nxv4i32.nxv4i32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vnmsub.mask.nxv4i32.nxv4i32( + , + , + , + , + i64); + +define @intrinsic_vnmsub_mask_vv_nxv4i32_nxv4i32_nxv4i32( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_mask_vv_nxv4i32_nxv4i32_nxv4i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vnmsub.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsub.mask.nxv4i32.nxv4i32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vnmsub.nxv8i32.nxv8i32( + , + , + , + i64); + +define @intrinsic_vnmsub_vv_nxv8i32_nxv8i32_nxv8i32( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_vv_nxv8i32_nxv8i32_nxv8i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vnmsub.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsub.nxv8i32.nxv8i32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vnmsub.mask.nxv8i32.nxv8i32( + , + , + , + , + i64); + +define @intrinsic_vnmsub_mask_vv_nxv8i32_nxv8i32_nxv8i32( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_mask_vv_nxv8i32_nxv8i32_nxv8i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vnmsub.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsub.mask.nxv8i32.nxv8i32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vnmsub.nxv1i64.nxv1i64( + , + , + , + i64); + +define @intrinsic_vnmsub_vv_nxv1i64_nxv1i64_nxv1i64( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_vv_nxv1i64_nxv1i64_nxv1i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vnmsub.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsub.nxv1i64.nxv1i64( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vnmsub.mask.nxv1i64.nxv1i64( + , + , + , + , + i64); + +define @intrinsic_vnmsub_mask_vv_nxv1i64_nxv1i64_nxv1i64( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_mask_vv_nxv1i64_nxv1i64_nxv1i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vnmsub.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsub.mask.nxv1i64.nxv1i64( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vnmsub.nxv2i64.nxv2i64( + , + , + , + i64); + +define @intrinsic_vnmsub_vv_nxv2i64_nxv2i64_nxv2i64( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_vv_nxv2i64_nxv2i64_nxv2i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vnmsub.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsub.nxv2i64.nxv2i64( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vnmsub.mask.nxv2i64.nxv2i64( + , + , + , + , + i64); + +define @intrinsic_vnmsub_mask_vv_nxv2i64_nxv2i64_nxv2i64( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_mask_vv_nxv2i64_nxv2i64_nxv2i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vnmsub.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsub.mask.nxv2i64.nxv2i64( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vnmsub.nxv4i64.nxv4i64( + , + , + , + i64); + +define @intrinsic_vnmsub_vv_nxv4i64_nxv4i64_nxv4i64( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_vv_nxv4i64_nxv4i64_nxv4i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vnmsub.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsub.nxv4i64.nxv4i64( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vnmsub.mask.nxv4i64.nxv4i64( + , + , + , + , + i64); + +define @intrinsic_vnmsub_mask_vv_nxv4i64_nxv4i64_nxv4i64( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_mask_vv_nxv4i64_nxv4i64_nxv4i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vnmsub.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsub.mask.nxv4i64.nxv4i64( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vnmsub.nxv1i8.i8( + , + i8, + , + i64); + +define @intrinsic_vnmsub_vx_nxv1i8_i8_nxv1i8( %0, i8 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_vx_nxv1i8_i8_nxv1i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vnmsub.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsub.nxv1i8.i8( + %0, + i8 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vnmsub.mask.nxv1i8.i8( + , + i8, + , + , + i64); + +define @intrinsic_vnmsub_mask_vx_nxv1i8_i8_nxv1i8( %0, i8 %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_mask_vx_nxv1i8_i8_nxv1i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vnmsub.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsub.mask.nxv1i8.i8( + %0, + i8 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vnmsub.nxv2i8.i8( + , + i8, + , + i64); + +define @intrinsic_vnmsub_vx_nxv2i8_i8_nxv2i8( %0, i8 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_vx_nxv2i8_i8_nxv2i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vnmsub.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsub.nxv2i8.i8( + %0, + i8 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vnmsub.mask.nxv2i8.i8( + , + i8, + , + , + i64); + +define @intrinsic_vnmsub_mask_vx_nxv2i8_i8_nxv2i8( %0, i8 %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_mask_vx_nxv2i8_i8_nxv2i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vnmsub.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsub.mask.nxv2i8.i8( + %0, + i8 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vnmsub.nxv4i8.i8( + , + i8, + , + i64); + +define @intrinsic_vnmsub_vx_nxv4i8_i8_nxv4i8( %0, i8 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_vx_nxv4i8_i8_nxv4i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vnmsub.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsub.nxv4i8.i8( + %0, + i8 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vnmsub.mask.nxv4i8.i8( + , + i8, + , + , + i64); + +define @intrinsic_vnmsub_mask_vx_nxv4i8_i8_nxv4i8( %0, i8 %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_mask_vx_nxv4i8_i8_nxv4i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vnmsub.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsub.mask.nxv4i8.i8( + %0, + i8 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vnmsub.nxv8i8.i8( + , + i8, + , + i64); + +define @intrinsic_vnmsub_vx_nxv8i8_i8_nxv8i8( %0, i8 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_vx_nxv8i8_i8_nxv8i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vnmsub.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsub.nxv8i8.i8( + %0, + i8 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vnmsub.mask.nxv8i8.i8( + , + i8, + , + , + i64); + +define @intrinsic_vnmsub_mask_vx_nxv8i8_i8_nxv8i8( %0, i8 %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_mask_vx_nxv8i8_i8_nxv8i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vnmsub.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsub.mask.nxv8i8.i8( + %0, + i8 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vnmsub.nxv16i8.i8( + , + i8, + , + i64); + +define @intrinsic_vnmsub_vx_nxv16i8_i8_nxv16i8( %0, i8 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_vx_nxv16i8_i8_nxv16i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vnmsub.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsub.nxv16i8.i8( + %0, + i8 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vnmsub.mask.nxv16i8.i8( + , + i8, + , + , + i64); + +define @intrinsic_vnmsub_mask_vx_nxv16i8_i8_nxv16i8( %0, i8 %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_mask_vx_nxv16i8_i8_nxv16i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vnmsub.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsub.mask.nxv16i8.i8( + %0, + i8 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vnmsub.nxv32i8.i8( + , + i8, + , + i64); + +define @intrinsic_vnmsub_vx_nxv32i8_i8_nxv32i8( %0, i8 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_vx_nxv32i8_i8_nxv32i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vnmsub.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsub.nxv32i8.i8( + %0, + i8 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vnmsub.mask.nxv32i8.i8( + , + i8, + , + , + i64); + +define @intrinsic_vnmsub_mask_vx_nxv32i8_i8_nxv32i8( %0, i8 %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_mask_vx_nxv32i8_i8_nxv32i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vnmsub.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsub.mask.nxv32i8.i8( + %0, + i8 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vnmsub.nxv1i16.i16( + , + i16, + , + i64); + +define @intrinsic_vnmsub_vx_nxv1i16_i16_nxv1i16( %0, i16 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_vx_nxv1i16_i16_nxv1i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vnmsub.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsub.nxv1i16.i16( + %0, + i16 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vnmsub.mask.nxv1i16.i16( + , + i16, + , + , + i64); + +define @intrinsic_vnmsub_mask_vx_nxv1i16_i16_nxv1i16( %0, i16 %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_mask_vx_nxv1i16_i16_nxv1i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vnmsub.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsub.mask.nxv1i16.i16( + %0, + i16 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vnmsub.nxv2i16.i16( + , + i16, + , + i64); + +define @intrinsic_vnmsub_vx_nxv2i16_i16_nxv2i16( %0, i16 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_vx_nxv2i16_i16_nxv2i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vnmsub.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsub.nxv2i16.i16( + %0, + i16 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vnmsub.mask.nxv2i16.i16( + , + i16, + , + , + i64); + +define @intrinsic_vnmsub_mask_vx_nxv2i16_i16_nxv2i16( %0, i16 %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_mask_vx_nxv2i16_i16_nxv2i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vnmsub.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsub.mask.nxv2i16.i16( + %0, + i16 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vnmsub.nxv4i16.i16( + , + i16, + , + i64); + +define @intrinsic_vnmsub_vx_nxv4i16_i16_nxv4i16( %0, i16 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_vx_nxv4i16_i16_nxv4i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vnmsub.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsub.nxv4i16.i16( + %0, + i16 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vnmsub.mask.nxv4i16.i16( + , + i16, + , + , + i64); + +define @intrinsic_vnmsub_mask_vx_nxv4i16_i16_nxv4i16( %0, i16 %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_mask_vx_nxv4i16_i16_nxv4i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vnmsub.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsub.mask.nxv4i16.i16( + %0, + i16 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vnmsub.nxv8i16.i16( + , + i16, + , + i64); + +define @intrinsic_vnmsub_vx_nxv8i16_i16_nxv8i16( %0, i16 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_vx_nxv8i16_i16_nxv8i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vnmsub.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsub.nxv8i16.i16( + %0, + i16 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vnmsub.mask.nxv8i16.i16( + , + i16, + , + , + i64); + +define @intrinsic_vnmsub_mask_vx_nxv8i16_i16_nxv8i16( %0, i16 %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_mask_vx_nxv8i16_i16_nxv8i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vnmsub.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsub.mask.nxv8i16.i16( + %0, + i16 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vnmsub.nxv16i16.i16( + , + i16, + , + i64); + +define @intrinsic_vnmsub_vx_nxv16i16_i16_nxv16i16( %0, i16 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_vx_nxv16i16_i16_nxv16i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vnmsub.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsub.nxv16i16.i16( + %0, + i16 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vnmsub.mask.nxv16i16.i16( + , + i16, + , + , + i64); + +define @intrinsic_vnmsub_mask_vx_nxv16i16_i16_nxv16i16( %0, i16 %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_mask_vx_nxv16i16_i16_nxv16i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vnmsub.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsub.mask.nxv16i16.i16( + %0, + i16 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vnmsub.nxv1i32.i32( + , + i32, + , + i64); + +define @intrinsic_vnmsub_vx_nxv1i32_i32_nxv1i32( %0, i32 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_vx_nxv1i32_i32_nxv1i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vnmsub.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsub.nxv1i32.i32( + %0, + i32 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vnmsub.mask.nxv1i32.i32( + , + i32, + , + , + i64); + +define @intrinsic_vnmsub_mask_vx_nxv1i32_i32_nxv1i32( %0, i32 %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_mask_vx_nxv1i32_i32_nxv1i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vnmsub.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsub.mask.nxv1i32.i32( + %0, + i32 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vnmsub.nxv2i32.i32( + , + i32, + , + i64); + +define @intrinsic_vnmsub_vx_nxv2i32_i32_nxv2i32( %0, i32 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_vx_nxv2i32_i32_nxv2i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vnmsub.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsub.nxv2i32.i32( + %0, + i32 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vnmsub.mask.nxv2i32.i32( + , + i32, + , + , + i64); + +define @intrinsic_vnmsub_mask_vx_nxv2i32_i32_nxv2i32( %0, i32 %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_mask_vx_nxv2i32_i32_nxv2i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vnmsub.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsub.mask.nxv2i32.i32( + %0, + i32 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vnmsub.nxv4i32.i32( + , + i32, + , + i64); + +define @intrinsic_vnmsub_vx_nxv4i32_i32_nxv4i32( %0, i32 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_vx_nxv4i32_i32_nxv4i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vnmsub.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsub.nxv4i32.i32( + %0, + i32 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vnmsub.mask.nxv4i32.i32( + , + i32, + , + , + i64); + +define @intrinsic_vnmsub_mask_vx_nxv4i32_i32_nxv4i32( %0, i32 %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_mask_vx_nxv4i32_i32_nxv4i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vnmsub.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsub.mask.nxv4i32.i32( + %0, + i32 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vnmsub.nxv8i32.i32( + , + i32, + , + i64); + +define @intrinsic_vnmsub_vx_nxv8i32_i32_nxv8i32( %0, i32 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_vx_nxv8i32_i32_nxv8i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vnmsub.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsub.nxv8i32.i32( + %0, + i32 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vnmsub.mask.nxv8i32.i32( + , + i32, + , + , + i64); + +define @intrinsic_vnmsub_mask_vx_nxv8i32_i32_nxv8i32( %0, i32 %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_mask_vx_nxv8i32_i32_nxv8i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vnmsub.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsub.mask.nxv8i32.i32( + %0, + i32 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vnmsub.nxv1i64.i64( + , + i64, + , + i64); + +define @intrinsic_vnmsub_vx_nxv1i64_i64_nxv1i64( %0, i64 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_vx_nxv1i64_i64_nxv1i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vnmsub.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsub.nxv1i64.i64( + %0, + i64 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vnmsub.mask.nxv1i64.i64( + , + i64, + , + , + i64); + +define @intrinsic_vnmsub_mask_vx_nxv1i64_i64_nxv1i64( %0, i64 %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_mask_vx_nxv1i64_i64_nxv1i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vnmsub.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsub.mask.nxv1i64.i64( + %0, + i64 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vnmsub.nxv2i64.i64( + , + i64, + , + i64); + +define @intrinsic_vnmsub_vx_nxv2i64_i64_nxv2i64( %0, i64 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_vx_nxv2i64_i64_nxv2i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vnmsub.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsub.nxv2i64.i64( + %0, + i64 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vnmsub.mask.nxv2i64.i64( + , + i64, + , + , + i64); + +define @intrinsic_vnmsub_mask_vx_nxv2i64_i64_nxv2i64( %0, i64 %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_mask_vx_nxv2i64_i64_nxv2i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vnmsub.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsub.mask.nxv2i64.i64( + %0, + i64 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vnmsub.nxv4i64.i64( + , + i64, + , + i64); + +define @intrinsic_vnmsub_vx_nxv4i64_i64_nxv4i64( %0, i64 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_vx_nxv4i64_i64_nxv4i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vnmsub.vx {{v[0-9]+}}, a0, {{v[0-9]+}} + %a = call @llvm.riscv.vnmsub.nxv4i64.i64( + %0, + i64 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vnmsub.mask.nxv4i64.i64( + , + i64, + , + , + i64); + +define @intrinsic_vnmsub_mask_vx_nxv4i64_i64_nxv4i64( %0, i64 %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vnmsub_mask_vx_nxv4i64_i64_nxv4i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vnmsub.vx {{v[0-9]+}}, a0, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vnmsub.mask.nxv4i64.i64( + %0, + i64 %1, + %2, + %3, + i64 %4) + + ret %a +} From 13f439a1872b559d72ee2b5951395310ce4393cc Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Thu, 17 Dec 2020 13:30:18 -0500 Subject: [PATCH 058/378] [lld/mac] Implement support for private extern symbols Private extern symbols are used for things scoped to the linkage unit. They cause duplicate symbol errors (so they're in the symbol table, unlike TU-scoped truly local symbols), but they don't make it into the export trie. They are created e.g. by compiling with -fvisibility=hidden. If two weak symbols have differing privateness, the combined symbol is non-private external. (Example: inline functions and some TUs that include the header defining it were built with -fvisibility-inlines-hidden and some weren't). A weak private external symbol implicitly has its "weak" dropped and behaves like a regular strong private external symbol: Weak is an export trie concept, and private symbols are not in the export trie. If a weak and a strong symbol have different privateness, the strong symbol wins. If two common symbols have differing privateness, the larger symbol wins. If they have the same size, the privateness of the symbol seen later during the link wins (!) -- this is a bit lame, but it matches ld64 and this behavior takes 2 lines less to implement than the less surprising "result is non-private external), so match ld64. (Example: `int a` in two .c files, both built with -fcommon, one built with -fvisibility=hidden and one without.) This also makes `__dyld_private` a true TU-local symbol, matching ld64. To make this work, make the `const char*` StringRefZ ctor to correctly set `size` (without this, writing the string table crashed when calling getName() on the __dyld_private symbol). Mention in CommonSymbol's comment that common symbols are now disabled by default in clang. Mention in -keep_private_externs's HelpText that the flag only has an effect with `-r` (which we don't implement yet -- so this patch here doesn't regress any behavior around -r + -keep_private_externs)). ld64 doesn't explicitly document it, but the commit text of http://reviews.llvm.org/rL216146 does, and ld64's OutputFile::buildSymbolTable() checks `_options.outputKind() == Options::kObjectFile` before calling `_options.keepPrivateExterns()` (the only reference to that function). Fixes PR48536. Differential Revision: https://reviews.llvm.org/D93609 --- lld/MachO/Driver.cpp | 2 +- lld/MachO/InputFiles.cpp | 30 ++++-- lld/MachO/Options.td | 2 +- lld/MachO/SymbolTable.cpp | 19 ++-- lld/MachO/SymbolTable.h | 5 +- lld/MachO/Symbols.h | 35 +++++-- lld/MachO/SyntheticSections.cpp | 49 ++++++--- lld/MachO/SyntheticSections.h | 1 + lld/test/MachO/dylink-lazy.s | 2 +- lld/test/MachO/private-extern.s | 143 +++++++++++++++++++++++++++ lld/test/MachO/symtab.s | 35 ++++--- lld/test/MachO/weak-private-extern.s | 38 +++++++ 12 files changed, 299 insertions(+), 62 deletions(-) create mode 100644 lld/test/MachO/private-extern.s create mode 100644 lld/test/MachO/weak-private-extern.s diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp index 9838e67cd4a29..82ddcf084dc00 100644 --- a/lld/MachO/Driver.cpp +++ b/lld/MachO/Driver.cpp @@ -524,7 +524,7 @@ static void replaceCommonSymbols() { replaceSymbol(sym, sym->getName(), isec, /*value=*/0, /*isWeakDef=*/false, - /*isExternal=*/true); + /*isExternal=*/true, common->privateExtern); } } diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp index 3a4466dd123a7..e2282f1fb2bb2 100644 --- a/lld/MachO/InputFiles.cpp +++ b/lld/MachO/InputFiles.cpp @@ -280,22 +280,33 @@ void ObjFile::parseRelocations(const section_64 &sec, static macho::Symbol *createDefined(const structs::nlist_64 &sym, StringRef name, InputSection *isec, uint32_t value) { - if (sym.n_type & N_EXT) - // Global defined symbol - return symtab->addDefined(name, isec, value, sym.n_desc & N_WEAK_DEF); - // Local defined symbol + // Symbol scope is determined by sym.n_type & (N_EXT | N_PEXT): + // N_EXT: Global symbols + // N_EXT | N_PEXT: Linkage unit (think: dylib) scoped + // N_PEXT: Does not occur in input files in practice, + // a private extern must be external. + // 0: Translation-unit scoped. These are not in the symbol table. + + if (sym.n_type & (N_EXT | N_PEXT)) { + assert((sym.n_type & N_EXT) && "invalid input"); + return symtab->addDefined(name, isec, value, sym.n_desc & N_WEAK_DEF, + sym.n_type & N_PEXT); + } return make(name, isec, value, sym.n_desc & N_WEAK_DEF, - /*isExternal=*/false); + /*isExternal=*/false, /*isPrivateExtern=*/false); } // Absolute symbols are defined symbols that do not have an associated // InputSection. They cannot be weak. static macho::Symbol *createAbsolute(const structs::nlist_64 &sym, StringRef name) { - if (sym.n_type & N_EXT) - return symtab->addDefined(name, nullptr, sym.n_value, /*isWeakDef=*/false); + if (sym.n_type & (N_EXT | N_PEXT)) { + assert((sym.n_type & N_EXT) && "invalid input"); + return symtab->addDefined(name, nullptr, sym.n_value, /*isWeakDef=*/false, + sym.n_type & N_PEXT); + } return make(name, nullptr, sym.n_value, /*isWeakDef=*/false, - /*isExternal=*/false); + /*isExternal=*/false, /*isPrivateExtern=*/false); } macho::Symbol *ObjFile::parseNonSectionSymbol(const structs::nlist_64 &sym, @@ -306,7 +317,8 @@ macho::Symbol *ObjFile::parseNonSectionSymbol(const structs::nlist_64 &sym, return sym.n_value == 0 ? symtab->addUndefined(name, sym.n_desc & N_WEAK_REF) : symtab->addCommon(name, this, sym.n_value, - 1 << GET_COMM_ALIGN(sym.n_desc)); + 1 << GET_COMM_ALIGN(sym.n_desc), + sym.n_type & N_PEXT); case N_ABS: return createAbsolute(sym, name); case N_PBUD: diff --git a/lld/MachO/Options.td b/lld/MachO/Options.td index 1ab2f9109ee06..52a351836a15a 100644 --- a/lld/MachO/Options.td +++ b/lld/MachO/Options.td @@ -346,7 +346,7 @@ def bundle_loader : Separate<["-"], "bundle_loader">, def grp_object : OptionGroup<"object">, HelpText<"CREATING AN OBJECT FILE">; def keep_private_externs : Flag<["-"], "keep_private_externs">, - HelpText<"Do not convert private external symbols to static symbols">, + HelpText<"Do not convert private external symbols to static symbols (only valid with -r)">, Flags<[HelpHidden]>, Group; def d : Flag<["-"], "d">, diff --git a/lld/MachO/SymbolTable.cpp b/lld/MachO/SymbolTable.cpp index ea231c9786e26..8a490083ebf1b 100644 --- a/lld/MachO/SymbolTable.cpp +++ b/lld/MachO/SymbolTable.cpp @@ -38,7 +38,8 @@ std::pair SymbolTable::insert(StringRef name) { } Symbol *SymbolTable::addDefined(StringRef name, InputSection *isec, - uint32_t value, bool isWeakDef) { + uint32_t value, bool isWeakDef, + bool isPrivateExtern) { Symbol *s; bool wasInserted; bool overridesWeakDef = false; @@ -46,8 +47,13 @@ Symbol *SymbolTable::addDefined(StringRef name, InputSection *isec, if (!wasInserted) { if (auto *defined = dyn_cast(s)) { - if (isWeakDef) + if (isWeakDef) { + // Both old and new symbol weak (e.g. inline function in two TUs): + // If one of them isn't private extern, the merged symbol isn't. + if (defined->isWeakDef()) + defined->privateExtern &= isPrivateExtern; return s; + } if (!defined->isWeakDef()) error("duplicate symbol: " + name); } else if (auto *dysym = dyn_cast(s)) { @@ -57,8 +63,9 @@ Symbol *SymbolTable::addDefined(StringRef name, InputSection *isec, // of a name conflict, we fall through to the replaceSymbol() call below. } - Defined *defined = replaceSymbol(s, name, isec, value, isWeakDef, - /*isExternal=*/true); + Defined *defined = + replaceSymbol(s, name, isec, value, isWeakDef, + /*isExternal=*/true, isPrivateExtern); defined->overridesWeakDef = overridesWeakDef; return s; } @@ -82,7 +89,7 @@ Symbol *SymbolTable::addUndefined(StringRef name, bool isWeakRef) { } Symbol *SymbolTable::addCommon(StringRef name, InputFile *file, uint64_t size, - uint32_t align) { + uint32_t align, bool isPrivateExtern) { Symbol *s; bool wasInserted; std::tie(s, wasInserted) = insert(name); @@ -98,7 +105,7 @@ Symbol *SymbolTable::addCommon(StringRef name, InputFile *file, uint64_t size, // a name conflict, we fall through to the replaceSymbol() call below. } - replaceSymbol(s, name, file, size, align); + replaceSymbol(s, name, file, size, align, isPrivateExtern); return s; } diff --git a/lld/MachO/SymbolTable.h b/lld/MachO/SymbolTable.h index 89c866594d725..871687f75eb7c 100644 --- a/lld/MachO/SymbolTable.h +++ b/lld/MachO/SymbolTable.h @@ -33,11 +33,12 @@ class Symbol; class SymbolTable { public: Symbol *addDefined(StringRef name, InputSection *isec, uint32_t value, - bool isWeakDef); + bool isWeakDef, bool isPrivateExtern); Symbol *addUndefined(StringRef name, bool isWeakRef); - Symbol *addCommon(StringRef name, InputFile *, uint64_t size, uint32_t align); + Symbol *addCommon(StringRef name, InputFile *, uint64_t size, uint32_t align, + bool isPrivateExtern); Symbol *addDylib(StringRef name, DylibFile *file, bool isWeakDef, bool isTlv); diff --git a/lld/MachO/Symbols.h b/lld/MachO/Symbols.h index 80be27dd1c1fc..7f987c722a1fa 100644 --- a/lld/MachO/Symbols.h +++ b/lld/MachO/Symbols.h @@ -47,7 +47,11 @@ class Symbol { Kind kind() const { return static_cast(symbolKind); } - StringRef getName() const { return {name.data, name.size}; } + StringRef getName() const { + if (nameSize == (uint32_t)-1) + nameSize = strlen(nameData); + return {nameData, nameSize}; + } virtual uint64_t getVA() const { return 0; } @@ -80,20 +84,26 @@ class Symbol { uint32_t symtabIndex = UINT32_MAX; protected: - Symbol(Kind k, StringRefZ name) : symbolKind(k), name(name) {} + Symbol(Kind k, StringRefZ name) + : symbolKind(k), nameData(name.data), nameSize(name.size) {} Kind symbolKind; - StringRefZ name; + const char *nameData; + mutable uint32_t nameSize; }; class Defined : public Symbol { public: Defined(StringRefZ name, InputSection *isec, uint32_t value, bool isWeakDef, - bool isExternal) + bool isExternal, bool isPrivateExtern) : Symbol(DefinedKind, name), isec(isec), value(value), - overridesWeakDef(false), weakDef(isWeakDef), external(isExternal) {} + overridesWeakDef(false), privateExtern(isPrivateExtern), + weakDef(isWeakDef), external(isExternal) {} bool isWeakDef() const override { return weakDef; } + bool isExternalWeakDef() const { + return isWeakDef() && isExternal() && !privateExtern; + } bool isTlv() const override { return !isAbsolute() && isThreadLocalVariables(isec->flags); } @@ -110,6 +120,7 @@ class Defined : public Symbol { uint32_t value; bool overridesWeakDef : 1; + bool privateExtern : 1; private: const bool weakDef : 1; @@ -148,14 +159,17 @@ class Undefined : public Symbol { // // The compiler creates common symbols when it sees tentative definitions. // (You can suppress this behavior and let the compiler create a regular -// defined symbol by passing -fno-common.) When linking the final binary, if -// there are remaining common symbols after name resolution is complete, the -// linker converts them to regular defined symbols in a __common section. +// defined symbol by passing -fno-common. -fno-common is the default in clang +// as of LLVM 11.0.) When linking the final binary, if there are remaining +// common symbols after name resolution is complete, the linker converts them +// to regular defined symbols in a __common section. class CommonSymbol : public Symbol { public: - CommonSymbol(StringRefZ name, InputFile *file, uint64_t size, uint32_t align) + CommonSymbol(StringRefZ name, InputFile *file, uint64_t size, uint32_t align, + bool isPrivateExtern) : Symbol(CommonKind, name), file(file), size(size), - align(align != 1 ? align : llvm::PowerOf2Ceil(size)) { + align(align != 1 ? align : llvm::PowerOf2Ceil(size)), + privateExtern(isPrivateExtern) { // TODO: cap maximum alignment } @@ -164,6 +178,7 @@ class CommonSymbol : public Symbol { InputFile *const file; const uint64_t size; const uint32_t align; + const bool privateExtern; }; class DylibSymbol : public Symbol { diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp index 433e6aac02595..8b2ebd36e1ae8 100644 --- a/lld/MachO/SyntheticSections.cpp +++ b/lld/MachO/SyntheticSections.cpp @@ -362,12 +362,10 @@ void WeakBindingSection::writeTo(uint8_t *buf) const { } bool macho::needsBinding(const Symbol *sym) { - if (isa(sym)) { + if (isa(sym)) return true; - } else if (const auto *defined = dyn_cast(sym)) { - if (defined->isWeakDef() && defined->isExternal()) - return true; - } + if (const auto *defined = dyn_cast(sym)) + return defined->isExternalWeakDef(); return false; } @@ -380,7 +378,7 @@ void macho::addNonLazyBindingEntries(const Symbol *sym, in.weakBinding->addEntry(sym, section, offset, addend); } else if (auto *defined = dyn_cast(sym)) { in.rebase->addEntry(section, offset); - if (defined->isWeakDef() && defined->isExternal()) + if (defined->isExternalWeakDef()) in.weakBinding->addEntry(sym, section, offset, addend); } else if (isa(sym)) { error("cannot bind to " + DSOHandle::name); @@ -446,8 +444,10 @@ void StubHelperSection::setup() { in.got->addEntry(stubBinder); inputSections.push_back(in.imageLoaderCache); - symtab->addDefined("__dyld_private", in.imageLoaderCache, 0, - /*isWeakDef=*/false); + dyldPrivate = + make("__dyld_private", in.imageLoaderCache, 0, + /*isWeakDef=*/false, + /*isExternal=*/false, /*isPrivateExtern=*/false); } ImageLoaderCacheSection::ImageLoaderCacheSection() { @@ -555,7 +555,7 @@ void macho::prepareBranchTarget(Symbol *sym) { } } } else if (auto *defined = dyn_cast(sym)) { - if (defined->isWeakDef() && defined->isExternal()) { + if (defined->isExternalWeakDef()) { if (in.stubs->addEntry(sym)) { in.rebase->addEntry(in.lazyPointers, sym->stubsIndex * WordSize); in.weakBinding->addEntry(sym, in.lazyPointers, @@ -570,9 +570,10 @@ ExportSection::ExportSection() void ExportSection::finalizeContents() { trieBuilder.setImageBase(in.header->addr); - // TODO: We should check symbol visibility. for (const Symbol *sym : symtab->getSymbols()) { if (const auto *defined = dyn_cast(sym)) { + if (defined->privateExtern) + continue; trieBuilder.addSymbol(*defined); hasWeakSymbol = hasWeakSymbol || sym->isWeakDef(); } @@ -710,6 +711,13 @@ void SymtabSection::finalizeContents() { } } + // __dyld_private is a local symbol too. It's linker-created and doesn't + // exist in any object file. + if (Defined* dyldPrivate = in.stubHelper->dyldPrivate) { + uint32_t strx = stringTableSection.addString(dyldPrivate->getName()); + localSymbols.push_back({dyldPrivate, strx}); + } + for (Symbol *sym : symtab->getSymbols()) { uint32_t strx = stringTableSection.addString(sym->getName()); if (auto *defined = dyn_cast(sym)) { @@ -752,18 +760,31 @@ void SymtabSection::writeTo(uint8_t *buf) const { nList->n_strx = entry.strx; // TODO populate n_desc with more flags if (auto *defined = dyn_cast(entry.sym)) { + uint8_t scope = 0; + if (defined->privateExtern) { + // Private external -- dylib scoped symbol. + // Promote to non-external at link time. + assert(defined->isExternal() && "invalid input file"); + scope = MachO::N_PEXT; + } else if (defined->isExternal()) { + // Normal global symbol. + scope = MachO::N_EXT; + } else { + // TU-local symbol from localSymbols. + scope = 0; + } + if (defined->isAbsolute()) { - nList->n_type = MachO::N_EXT | MachO::N_ABS; + nList->n_type = scope | MachO::N_ABS; nList->n_sect = MachO::NO_SECT; nList->n_value = defined->value; } else { - nList->n_type = - (defined->isExternal() ? MachO::N_EXT : 0) | MachO::N_SECT; + nList->n_type = scope | MachO::N_SECT; nList->n_sect = defined->isec->parent->index; // For the N_SECT symbol type, n_value is the address of the symbol nList->n_value = defined->getVA(); } - nList->n_desc |= defined->isWeakDef() ? MachO::N_WEAK_DEF : 0; + nList->n_desc |= defined->isExternalWeakDef() ? MachO::N_WEAK_DEF : 0; } else if (auto *dysym = dyn_cast(entry.sym)) { uint16_t n_desc = nList->n_desc; MachO::SET_LIBRARY_ORDINAL(n_desc, dysym->file->ordinal); diff --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h index 9384df6481f05..7bca28de13869 100644 --- a/lld/MachO/SyntheticSections.h +++ b/lld/MachO/SyntheticSections.h @@ -327,6 +327,7 @@ class StubHelperSection : public SyntheticSection { void setup(); DylibSymbol *stubBinder = nullptr; + Defined *dyldPrivate = nullptr; }; // This section contains space for just a single word, and will be used by dyld diff --git a/lld/test/MachO/dylink-lazy.s b/lld/test/MachO/dylink-lazy.s index 8e3e0b6ae7170..a5f275e55f085 100644 --- a/lld/test/MachO/dylink-lazy.s +++ b/lld/test/MachO/dylink-lazy.s @@ -27,7 +27,7 @@ # RUN: llvm-objdump --macho --rebase %t/dylink-lazy-pie | FileCheck %s --check-prefix=PIE # CHECK-LABEL: SYMBOL TABLE: -# CHECK: {{0*}}[[#%x, IMGLOADER:]] {{.*}} __DATA,__data __dyld_private +# CHECK: {{0*}}[[#%x, IMGLOADER:]] l {{.*}} __DATA,__data __dyld_private # CHECK-LABEL: Disassembly of section __TEXT,__text: # CHECK: callq 0x[[#%x, HELLO_STUB:]] diff --git a/lld/test/MachO/private-extern.s b/lld/test/MachO/private-extern.s new file mode 100644 index 0000000000000..c8c96aa8e7b3a --- /dev/null +++ b/lld/test/MachO/private-extern.s @@ -0,0 +1,143 @@ +# REQUIRES: x86 + +# RUN: split-file %s %t +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos \ +# RUN: %t/basics.s -o %t/basics.o + +## Check that .private_extern symbols are marked as local in the symbol table +## and aren't in the export trie. +# RUN: %lld -dylib %t/basics.o -o %t/basics +# RUN: llvm-objdump --syms --exports-trie %t/basics | \ +# RUN: FileCheck --check-prefix=EXPORTS %s +# RUN: llvm-nm -m %t/basics | FileCheck --check-prefix=EXPORTS-NM %s +# EXPORTS-LABEL: SYMBOL TABLE: +# EXPORTS-DAG: [[#%x, FOO_ADDR:]] l {{.*}} _foo +# EXPORTS-DAG: [[#%x, BAR_ADDR:]] g {{.*}} _bar +# EXPORTS-LABEL: Exports trie: +# EXPORTS-NOT: 0x{{0*}}[[#%X, FOO_ADDR]] _foo +# EXPORTS-DAG: 0x{{0*}}[[#%X, BAR_ADDR]] _bar +# EXPORTS-NOT: 0x{{0*}}[[#%X, FOO_ADDR]] _foo +# EXPORTS-NM-DAG: (__TEXT,__cstring) non-external (was a private external) _foo +# EXPORTS-NM-DAG: (__TEXT,__cstring) external _bar + +#--- basics.s +.section __TEXT,__cstring + +.globl _foo, _bar +.private_extern _foo + +_foo: +.asciz "Foo" + +_bar: +.asciz "Bar" + +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos \ +# RUN: %t/strong-globl.s -o %t/strong-globl.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos \ +# RUN: %t/weak-globl.s -o %t/weak-globl.o + +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos \ +# RUN: %t/strong-private.s -o %t/strong-private.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos \ +# RUN: %t/weak-private.s -o %t/weak-private.o + +## weak + strong symbol takes privateness from strong symbol +## - weak private extern + strong extern = strong extern (for both .o orderings) +# RUN: %lld -dylib %t/weak-private.o %t/strong-globl.o -o %t/wpsg +# RUN: llvm-nm -m %t/wpsg | FileCheck --check-prefix=EXTERNAL %s +# RUN: %lld -dylib %t/strong-globl.o %t/weak-private.o -o %t/sgwp +# RUN: llvm-nm -m %t/sgwp | FileCheck --check-prefix=EXTERNAL %s +# EXTERNAL: (__TEXT,__text) external _foo +## - weak extern + strong private extern = strong private extern +## (for both .o orderings) +# RUN: %lld -dylib %t/weak-globl.o %t/strong-private.o -o %t/wgsp +# RUN: llvm-nm -m %t/wgsp | FileCheck --check-prefix=NONEXTERNAL %s +# RUN: %lld -dylib %t/strong-private.o %t/weak-globl.o -o %t/spwg +# RUN: llvm-nm -m %t/spwg | FileCheck --check-prefix=NONEXTERNAL %s +# NONEXTERNAL: (__TEXT,__text) non-external (was a private external) _foo + +## weak + weak symbol take weaker privateness +## - weak extern + weak private extern = weak extern (both orders) +# RUN: %lld -dylib %t/weak-private.o %t/weak-globl.o -o %t/wpwg +# RUN: llvm-nm -m %t/wpwg | FileCheck --check-prefix=WEAK-EXTERNAL %s +# RUN: %lld -dylib %t/weak-globl.o %t/weak-private.o -o %t/wgwp +# RUN: llvm-nm -m %t/wgwp | FileCheck --check-prefix=WEAK-EXTERNAL %s +# WEAK-EXTERNAL: (__TEXT,__text) weak external _foo +## - weak private extern + weak private extern = weak private extern +# RUN: %lld -dylib %t/weak-private.o %t/weak-private.o -o %t/wpwp +# RUN: llvm-nm -m %t/wpwp | FileCheck --check-prefix=NONEXTERNAL %s + +#--- strong-globl.s +.globl _foo +_foo: + retq + +#--- weak-globl.s +.globl _foo +.weak_definition _foo +_foo: + retq + +#--- strong-private.s +.private_extern _foo +.globl _foo +_foo: + retq + +#--- weak-private.s +.private_extern _foo +.globl _foo +.weak_definition _foo +_foo: + retq + +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos \ +# RUN: %t/comm-small.s -o %t/comm-small.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos \ +# RUN: %t/comm-large.s -o %t/comm-large.o + +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos \ +# RUN: %t/comm-small-private.s -o %t/comm-small-private.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos \ +# RUN: %t/comm-large-private.s -o %t/comm-large-private.o + +## For common symbols the larger one wins. +## - smaller private extern + larger extern = larger extern +# RUN: %lld -dylib %t/comm-small-private.o %t/comm-large.o -o %t/cspcl +# RUN: llvm-nm -m %t/cspcl | FileCheck --check-prefix=COMMON-EXTERNAL %s +# RUN: %lld -dylib %t/comm-large.o %t/comm-small-private.o -o %t/clcsp +# RUN: llvm-nm -m %t/clcsp | FileCheck --check-prefix=COMMON-EXTERNAL %s +# COMMON-EXTERNAL: (__DATA,__common) external _foo +## - smaller extern + larger private extern = larger private extern +# RUN: %lld -dylib %t/comm-large-private.o %t/comm-small.o -o %t/clpcs +# RUN: llvm-nm -m %t/clpcs | FileCheck --check-prefix=COMMON-NONEXTERNAL %s +# RUN: %lld -dylib %t/comm-small.o %t/comm-large-private.o -o %t/csclp +# RUN: llvm-nm -m %t/csclp | FileCheck --check-prefix=COMMON-NONEXTERNAL %s +# COMMON-NONEXTERNAL: (__DATA,__common) non-external (was a private external) _foo + +# For common symbols with the same size, the privateness of the symbol seen +# later wins (!). +## - equal private extern + equal extern = equal extern (both orders) +# RUN: %lld -dylib %t/comm-small-private.o %t/comm-small.o -o %t/cspcs +# RUN: llvm-nm -m %t/cspcs | FileCheck --check-prefix=COMMON-EXTERNAL %s +## - equal extern + equal private extern = equal private extern (both orders) +# RUN: %lld -dylib %t/comm-small.o %t/comm-small-private.o -o %t/cscsp +# RUN: llvm-nm -m %t/cscsp | FileCheck --check-prefix=COMMON-NONEXTERNAL %s +## - equal private extern + equal private extern = equal private extern +# RUN: %lld -dylib %t/comm-small-private.o %t/comm-small-private.o -o %t/cspcsp +# RUN: llvm-nm -m %t/cspcsp | FileCheck --check-prefix=COMMON-NONEXTERNAL %s + +#--- comm-small.s +.comm _foo,4,2 + +#--- comm-large.s +.comm _foo,8,3 + +#--- comm-small-private.s +.private_extern _foo +.comm _foo,4,2 + +#--- comm-large-private.s +.private_extern _foo +.comm _foo,8,3 diff --git a/lld/test/MachO/symtab.s b/lld/test/MachO/symtab.s index 222a35798909e..d18986c9d91c0 100644 --- a/lld/test/MachO/symtab.s +++ b/lld/test/MachO/symtab.s @@ -8,7 +8,7 @@ # RUN: llvm-readobj --syms --macho-dysymtab %t/test | FileCheck %s # CHECK: Symbols [ # CHECK-NEXT: Symbol { -# CHECK-NEXT: Name: _local (2) +# CHECK-NEXT: Name: _local # CHECK-NEXT: Type: Section (0xE) # CHECK-NEXT: Section: __data (0x4) # CHECK-NEXT: RefType: UndefinedNonLazy (0x0) @@ -17,48 +17,47 @@ # CHECK-NEXT: Value: 0x1{{[0-9a-f]*}} # CHECK-NEXT: } # CHECK-NEXT: Symbol { -# CHECK-NEXT: Name: _main (9) -# CHECK-NEXT: Extern +# CHECK-NEXT: Name: __dyld_private # CHECK-NEXT: Type: Section (0xE) -# CHECK-NEXT: Section: __text (0x1) +# CHECK-NEXT: Section: __data (0x4) # CHECK-NEXT: RefType: UndefinedNonLazy (0x0) # CHECK-NEXT: Flags [ (0x0) # CHECK-NEXT: ] # CHECK-NEXT: Value: 0x1{{[0-9a-f]*}} # CHECK-NEXT: } # CHECK-NEXT: Symbol { -# CHECK-NEXT: Name: _external (55) +# CHECK-NEXT: Name: _main # CHECK-NEXT: Extern # CHECK-NEXT: Type: Section (0xE) -# CHECK-NEXT: Section: __data (0x4) +# CHECK-NEXT: Section: __text (0x1) # CHECK-NEXT: RefType: UndefinedNonLazy (0x0) # CHECK-NEXT: Flags [ (0x0) # CHECK-NEXT: ] # CHECK-NEXT: Value: 0x1{{[0-9a-f]*}} # CHECK-NEXT: } # CHECK-NEXT: Symbol { -# CHECK-NEXT: Name: _external_weak (65) +# CHECK-NEXT: Name: _external # CHECK-NEXT: Extern # CHECK-NEXT: Type: Section (0xE) -# CHECK-NEXT: Section: __text (0x1) +# CHECK-NEXT: Section: __data (0x4) # CHECK-NEXT: RefType: UndefinedNonLazy (0x0) -# CHECK-NEXT: Flags [ (0x80) -# CHECK-NEXT: WeakDef (0x80) +# CHECK-NEXT: Flags [ (0x0) # CHECK-NEXT: ] # CHECK-NEXT: Value: 0x1{{[0-9a-f]*}} # CHECK-NEXT: } # CHECK-NEXT: Symbol { -# CHECK-NEXT: Name: __dyld_private (103) +# CHECK-NEXT: Name: _external_weak # CHECK-NEXT: Extern # CHECK-NEXT: Type: Section (0xE) -# CHECK-NEXT: Section: __data (0x4) +# CHECK-NEXT: Section: __text (0x1) # CHECK-NEXT: RefType: UndefinedNonLazy (0x0) -# CHECK-NEXT: Flags [ (0x0) +# CHECK-NEXT: Flags [ (0x80) +# CHECK-NEXT: WeakDef (0x80) # CHECK-NEXT: ] # CHECK-NEXT: Value: 0x1{{[0-9a-f]*}} # CHECK-NEXT: } # CHECK-NEXT: Symbol { -# CHECK-NEXT: Name: dyld_stub_binder (15) +# CHECK-NEXT: Name: dyld_stub_binder # CHECK-NEXT: Extern # CHECK-NEXT: Type: Undef (0x0) # CHECK-NEXT: Section: (0x0) @@ -68,7 +67,7 @@ # CHECK-NEXT: Value: 0x0 # CHECK-NEXT: } # CHECK-NEXT: Symbol { -# CHECK-NEXT: Name: _dynamic (80) +# CHECK-NEXT: Name: _dynamic # CHECK-NEXT: Extern # CHECK-NEXT: Type: Undef (0x0) # CHECK-NEXT: Section: (0x0) @@ -81,9 +80,9 @@ # CHECK-NEXT: ] # CHECK-NEXT: Dysymtab { # CHECK-NEXT: ilocalsym: 0 -# CHECK-NEXT: nlocalsym: 1 -# CHECK-NEXT: iextdefsym: 1 -# CHECK-NEXT: nextdefsym: 4 +# CHECK-NEXT: nlocalsym: 2 +# CHECK-NEXT: iextdefsym: 2 +# CHECK-NEXT: nextdefsym: 3 # CHECK-NEXT: iundefsym: 5 # CHECK-NEXT: nundefsym: 2 diff --git a/lld/test/MachO/weak-private-extern.s b/lld/test/MachO/weak-private-extern.s new file mode 100644 index 0000000000000..78fb4260999c6 --- /dev/null +++ b/lld/test/MachO/weak-private-extern.s @@ -0,0 +1,38 @@ +# REQUIRES: x86 + +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos %s -o %t.o +# RUN: %lld -dylib %t.o -o %t.dylib -lSystem +# RUN: llvm-objdump --macho --weak-bind %t.dylib | FileCheck %s +# CHECK-NOT: __got +# CHECK-NOT: __la_symbol_ptr + +# RUN: llvm-objdump --macho --all-headers %t.dylib | \ +# RUN: FileCheck --check-prefix=HEADERS %s +# HEADERS-NOT: WEAK_DEFINES +# HEADERS-NOT: BINDS_TO_WEAK + +## Check that N_WEAK_DEF isn't set in the symbol table. +## This is different from ld64, which makes private extern weak symbols non-weak +## for binds and relocations, but it still marks them as weak in the symbol table. +## Since `nm -m` doesn't look at N_WEAK_DEF for N_PEXT symbols this is not +## observable, but it feels slightly more correct. +# RUN: llvm-readobj --syms %t.dylib | FileCheck --check-prefix=SYMS %s +# SYMS-NOT: WeakDef (0x80) + +.globl _use +_use: + mov _weak_private_extern_gotpcrel@GOTPCREL(%rip), %rax + callq _weak_private_extern + retq + +.private_extern _weak_private_extern +.globl _weak_private_extern +.weak_definition _weak_private_extern +_weak_private_extern: + retq + +.private_extern _weak_private_extern_gotpcrel +.globl _weak_private_extern_gotpcrel +.weak_definition _weak_private_extern_gotpcrel +_weak_private_extern_gotpcrel: + .quad 0x1234 From dbb01536f6f49fa428f170e34466072ef439b3e9 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Mon, 21 Dec 2020 15:24:45 -0800 Subject: [PATCH 059/378] scan-view: Remove Reporter.py and associated AppleScript files I'm not exactly sure what this is, but it appears to be a tool for reporting internal issues at Apple. These files haven't been meaningfully updated in 12 years, and it doesn't seem like there is any reason to keep them in tree. Reviewed By: NoQ Differential Revision: https://reviews.llvm.org/D93565 --- clang/tools/scan-view/CMakeLists.txt | 3 - clang/tools/scan-view/share/FileRadar.scpt | Bin 18418 -> 0 bytes .../scan-view/share/GetRadarVersion.scpt | 0 clang/tools/scan-view/share/Reporter.py | 251 ------------------ 4 files changed, 254 deletions(-) delete mode 100644 clang/tools/scan-view/share/FileRadar.scpt delete mode 100644 clang/tools/scan-view/share/GetRadarVersion.scpt delete mode 100644 clang/tools/scan-view/share/Reporter.py diff --git a/clang/tools/scan-view/CMakeLists.txt b/clang/tools/scan-view/CMakeLists.txt index 22edb974bac7e..dd3d33439299a 100644 --- a/clang/tools/scan-view/CMakeLists.txt +++ b/clang/tools/scan-view/CMakeLists.txt @@ -5,10 +5,7 @@ set(BinFiles set(ShareFiles ScanView.py - Reporter.py startfile.py - FileRadar.scpt - GetRadarVersion.scpt bugcatcher.ico) if(CLANG_INSTALL_SCANVIEW) diff --git a/clang/tools/scan-view/share/FileRadar.scpt b/clang/tools/scan-view/share/FileRadar.scpt deleted file mode 100644 index 1c7455285ccb0a7614d9c5a8da2a8a689ca68ff9..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 18418 zcmeHv2b5IBvv*YwaV|Z}AVDQKf=h;7auOvif+8S6R79N(z``bH7Z5RFz#K3~K)`?s zh*?q0Ip>^n&RM(OuWs+`EPoe-@4R=;_s)3>Q`6nIyQ;dnI#gBP8J(;woH~4RqF2ve zx%!C_QN1h?VlotAB*-Gob5X8;^$gX+lls)a0+`~kFhUKfkp+m1#WVbgqsH=g6kj{` zOcu?rDo<8al+7zCAvz9E&!MjPv(%WlTx+BLUxEBZjdH18GBs;Hk&(ZZZlg5a^C)U4 z3fuuTu>d9&Hmi?1F$-YI!hD3XsHp|uUdS^=-;tkX?PA!%3<{50i z1@ap;%%w=Gs1hBcX85$C9W6k;gU~!mJJL?Hv-~Q*SR~Bwu*_0xYNH^OP?Q4s8UJgz zved>Chdkt`i2S7D-X!IYrUR7@pEPeNHCJ}HzHP&t)S1z;JTlu=ckW&7h<3b=~H zdfKW4?JL8QnJ6tluBzr~MRje(IhIBNV+C55Q+`Ojl%}9&RhUkIk0M|x!1H3%%HSwO zUOD$1mXT7?+@dh0y{M$4sjHD6RLOpYlE^PQDm&tnpj|CMILJFe4(+Lf1qgL(Cu&Q( zQMLszeIdi0A>i++V-%oXw4gkHj-`%XoESIxE+XF%Tgvvsr^EYgAm47La2+x#@=YM$ zQ2SiUDqUDmURqIJR#aA1mMksOf|2Qh@hC8~EBNe0oh?AfX98rq5av=B&u0P*cBAeV zAmnpb+D*Qu9#McapFKRE=$(8Ok*~;D>h5`g0OZR+zTAw@t{FbR2;>XuoJ-AW@j1UJ zRasnKre!s(?(fSCbq2RRDaQhY+;*ZKluNxVKq%EtV6!*%u>c{rovE9APJN>QX>R*^ zZqY~iEFzy##8My65yT*$2J-1<-1f?F`$-_5Q1@JF4sJ`5m6gRaiwlxf;I*K*`EWCi2WB|_AdnAe$6Sh*F06nE%gc%jH9av*j~!*GKe*eShFXA- zyM8o;_Mkm2K*(J`ur-W^TY!+e{xnG5rx8(rG)4*{1nX_8VW8)(kKfMa=AN=pgbCF0YWZ! z2YY+b7z+?`Ih2OU+cY)`kmho%=MsIBw<7YEZZ=~)C+NPs8OWQPaXC7}SAbb+Q$NfJdUJ&X&miq0YV-}Vg(vc6D&Z;<0u*{uhYaR zK$^#ip2s{IBdt|rhVd6}j~0n%Je^IT1&{p6*HyrlcW{+=?r zBQFN>;$~b;$#C^TATLn!Txz7VyKH9hEKQ9JQwDSm9Y6k@9dL4{zhPQi|qfRBi#n=10;Zc_@&FwrjO72a*ap z+yaD17SU1iU?2}}_ab~a5aHNh0YXGa(b4iiAP;PHZLCD2Dw<~j!bV5aF>-$(_ixjZ zH4lyF(*g?+Ha>=qmHPs@ZyQ#U1!#H%Ewlh((_`s)xi^q|>zv>VQ5mwe0Ac0h=_J_{ z$fhmMyG3Yll-y(KDBs{DIz{ddj<*0cHE)$$)eZYS>Ms#gzrgf~xsy zwu&4_BIl5%S*}t7tCheNe*b|w)3PjD##c82*8k?eNyA6Ms$)u)6vKW*Y7BVgCgY0A zE0U?g#GsLV)Yh&2_h7}6jbplnB`X?FN){$lJz;*=uCeWDF0CgI-FH1^tp5}I5j9Sv z^VQ!?hF4USR3EP^VSz#p4_5p-1W)6de(3(#Zb&m^Otb?0cc{(&;M$D`*J3yZJGyZ# zv$H1-0~Pp}1@o&3PHIIk4l7WW!mgjd(RUOHlR*fBJqP2c3(sZvETg3{ zcFyc%*{qzxWK}XzQeIg(B++Lmtb>{5snTRwLD7&z-=R~-=FF>1<`6E+m(oHAcgwc>NDPEpMPLteNf-~2WFx)1&*|S0ch&ezIZwnC-xjrQO`v_ z&QK-zq^b3<=0d#%n%X2c{{)bbt7${n=HOU`dV@`-C_Yk z`tV-K0pBZ1YKyAGyK;N8V ziK^7RqC|Nrp%{w10^OtvbZeY$g@;PjAq7FIV(6eHAlxJd25Mz!J?h^^w_AX42DYMB za;aP*7qeWdlkXA*7wZhX9iKbsP74rrP-|X9chN@17pB%-7?=DW37rvBU^pi_Vw%A% zfytl(N+eT7iL!Fcow8X)sl>d>qQXRRSt48&68(GjNhHe(6D7&2qEyes*qMn6cnXRN zyC*Pj7KTlWXO)$wiVAxU_Wj$a{kuC(chf_mf~VDS3KUxvR2?#Y;><@*6;72fGeLB< z(pore1FDLv+H1{3sF+3JT2_qGM6H!C0n@)HCP4!7D+_DrWULC?^+f4dNLJlt-nphY zx+CvZdtunhQ&ne}Qr27{Fb+0!C;D{{ZL$D0(!G%GrTbVeRO#M}s)trDT%*$>#&R`i zkgiu#W*1jNW+^zQAi)GO+a&7liQ=kGm5H=$y=d=K(cT}Y`)LzcuD$k*4p~+YU|Pt2 zof(8hKfTgSfff{_l^27qEILQMh!WLL{`-Wk;?<$;6jJ;Tu^M#1&;#_K1*j3~1<;s> zSuRkaK7_^v)k2+ITUq}{1pBZG_K`R}f`6L}b}~d%x1RogF5m|t;792(3s58A^XYMV zg5`V_@Z&HfxQExPC$#%d#_369dl-5QUr*7~7647v8Q)6IlXGP$%XzxyoU34|E<8`; z^9(&}0m5~(70w;c(eoA{Tu0%?mvbUG7vd=Rd}jGPJCL)(!)*S%S;d9LWmVJ&sj;KR zmQ@*g7C2v^7cD@Dvo##=FVV{uAjApxF})I_SCr>h+-OnS>A5u)6M5^_dV&e&|Zf772{I?zUX+tAxU^A5dh0Ya`iVEuTH z-nRfIA3IhD>WZ+yhZdkZ-Jl(%2w^eeWEe~KUhzg_LP0!3>~b^cVeY0m6g zhi*))7J}}Ry3dW$kd$RmP+Ws)h@2S6iCeLXEg_bvr!IJoxj~E@DDsAJZskoE`~ z$*~wC$hd*C;+zF%ik^?7mV*ra#epo&v^Crq^qX?ra#K%#FPg~Bcn8bPJpH|BJU8bS zmYb*ZJpAh1CdPk{WKSm8%O_jE7qumd?-N5j@iJ2u}mPd07z1 zf(#AIBRy}33G=8Jk5b<9;+zM6g)YD8U~xFo!#$Qc@W0OwWd0U>QM~?UB zX_oi*0$NOG@d13GBR-=7T1-p$5I)p0LQcBQz#lD# z2Xc6ZhUG&&Z*ao%VKF{Tc}vDQiL=in)i+p4>eiSCH$(AeE`|RP(F{*v3H;O*fmCd_ z32%A2XXF$*o%3UyuZ$GLxxfqQz>JW}11ZmR!0ee#4jc0SQ<=I{|%jg!K!*eaqN#}3m5-zoj)3Aze8N5GbTyAmyr8<8VSMcGM zE7JK@oZ?E$)!~_Cv|Q!}GB?ASXRi`W2!oL^taUK(_-HAV0?SAH4r$3Te5_@JV$`X7KHT2< zQOQSl!V64%tS5OMoiE8il67*;Qm6(@fdbsOGJ>m72KE5#h`0Qie-kg`;|w1MKrsR~ zatJTslcRD7_|@3EcD4#9|$^n*7_d-I6ObcXM9lI0@CJV~}iu5!f-Jj3kGo!MFwtL-T*;h#-gxG-31IfMl0=_URdxK|kdNkz`I4xN2J(vhDlBFfd*m>7`BKS~QI;?D$kqHE8OX?O zIHI!TDUnf1UB6 zhbdx&mt+r2x#dyWLlKu0U^JF_#IUJ(g$$M5EwAv1Q6fVE8M2)lEz3|PxVsWWNJ9qm zl`<$QgF&zmUR=vpdh{^TWndr!x9Z+)AcJ%jKG8QpZ)HFr11OeD^+T6N_=WpOjPNb5 z8R zD1$4xxCC(u_+Q~mTu@w6l9&xAbRq>WZFy;8Rx*{JoCTk5MMY6rA-u96N-@4hSLSQu zd~N6koe-Le>T1>~fcN8jXQ=lNE(ps@U`}e(LET>oZ1vu;s*Z>RPQL1<%?DK1tU6sa z?4%I@Jr}`oYWOO2XboRyc@0G{^XNNn&)4$}yw)-t^1AK*NPqA;UT=9F8LY<0|He1+ z28;Mns6s!|PtrS(-v5xRg=HS@J3rB{(km*xFo^2Cwum%^Ep3a~>OHbV*dgz!V9D$p@u-Du3O@?nmCvWCkEZ-b<3B_@~m2b0r zYq(({pT)QH9hPqoFX?cEv!pxU$#+GiyQ+HC5mlD&3_HV(IgoA<>4qD8e3vh3!ZGO@ zk**?^?+Cl5jsoctNS8WvM*00rMLLRv=vV zSfgM%x97Vp-yI%eklU5-;Z2tBN$1=0y?meLd(-*Wd_O;6`Tlgi6+g%iS$@#--i%xD z!!cIZDnAnEN1%wEEkERY(Sln_$3Qx6aeyB1NLumE{Ai3HRV0ta`7!L3>R4{C|196< zk+tSFk{w9)zjWC0CXci&C;0IgKdwlhi1QN&z=jB^i;+$`7ZH`>C;2JMPlmIWJ8%ct zO*%+>mfavH>7by!)_e+|r}-JnPy6xj0Gsn!e$Fzym^dk-Wlw&dU$FdqI^UIFu%ie>D`n$=lf$W+&D_MTUbG;jP;#Xt*s&f5WoL_?nB7IUU^rya;yf`{@ z7fA$?s6#_u@Tj_S4}LwyuPdrI;{1j`uEA%U_BZSj$SxTc3_pWzzR7P{e$xx27x$8O z(pK8Av{PBMRnSHS@)kaC^E;N`4iDVii;v@Xr8U3D@5|1X-}SR+7%$=v48vE-AM!_* zKlIWY#*_GC{>1XfUOK~I!F|e~S^hMgAIG2b7nVOy=kxeW{>t)~zURYuxa<_jP8rrM zf91P8oJaB3G5%V+{7sy{L3D07q7xdEEWj$KYgTx2{z5xgpYwPh{#JGr_=>*uGg(W% zO9u_&HRJsVl3}1EdptgIpTirALQ|9X&y-PEu2bsR3Wy~ zH3qIv+4tDvaoRGJgh=~F-#v={*z+#m8fz;28kTrOrLr1ohD@-(Iq8M2d#7zKgOyHjy)s@(fOQ4skji#Z;bU0T@ zy+G>KFl$YHk7ynrVVcAY{Hq&`jT;-Ph>2wS7zqULp47ZtAq{XpLt1A<|DMMKPnej9 z7`49M_e0!|hnSeKkTo$s#9ESNnp%?;T3_6s2Rn&6!4QPjmuc$R?Z^F{3!K}+I5Gr) zD<%LttJo1xF>%w(nz&zE7V~2FxBJWe$?k7da(^lK6EzLIx@HH{+?pNy04?S-ObgS} znihTl7V~MQmD$mnR_Xi_vy<7`nw>mfu;ttzf%_vP4{PAi)d4x4Pcp4zrnSnkP299W zXk$2*1b>)G;L<`70;>z)?a-Sv7#g^2$H8a0FD~Pb#Kr48zTV|6n%(?}E$8L#JNK>ohTV5MSl=r6Mn^0g zpN{TpYdZQ9#&TY7I$6`nkI!;mXF8iM)^twiuQOduH*314^Vgd0riV4%)A`k=r^&IV zr{{1vuQa(a1E0tS(<^Rzfwj)o9g4h z!*uh=uH`lE%fNk^88BT z9WI@~y}6y{dEnmIimxMZuWy61B5<$$tGg_4uh!x33fwDO@KXft<+`^q_mU3$iwa)A zz?(tTu&H~V8oB4(vnJ1scF#nS-P)KvSL+1}g1NU^TCr!<0YU!SGPf`o_ zgnOLYxyRh2?h*H}d&oWL9&q=&``o>5le@>=?KZl*+?`SPRP}}&pb?HY@KW>mz&()} z^a$`Yin_;8PfxCadvqIh;F0apfrqzK2O{nvsu#Eix1tCU_W;!o+_t>45OMcWgTUSU zuPPC7o2X&n?x|BR?%sl4MBGMd9JsscRF1&i`7iS^?w+CZSG?Cf@A(NI}AHdTLWab{%S$Y;R<3scZ(Stako(8m>G*} z$?j%%liT2KbnD$Zx7OX@t`FSJ+o)(aZI_~L*iJKk&vB0h0rp0F6ziL+C*4C+1 zH*7(v0(X6#`eYp58fj|wqPV-xt#Q}7YuwfDD!1CLax2}HZiQR!mbok3yrt1`ZNn`C^|HpqB&oibjvMH#QGQ^tY2a=ZQefm^XvH*w&Whg|&A z0~xqwndLZeSELQHz+L{2V|w5&`xnnv;4amLKD1+-nlUttmbpuCL+&%&?)n8z)q323 zad(B=aA)ocvuz-pA*11InF4D$-(AdAJl|c!i`<3o0(ZVU&z zarF)rbr)6dK~Z-hQW_MCx(hUg*j3b>uTK$hin{Z{R9R`%or{zPY@%){QW{5zx^s}y zphwi5t@RL@h`O^hg-}A&or#qC_#^HNY94i`SDRCH#pOoOS7(12roPI)D*Nj&^-cEG+24jKY#-UzWG5+Jookl861g3!bIr5Y zAlE{3t*G1FYm?Nnd*0mHZOGn`q*mE$@o>j%lYlhpp2Bz0Jsq}{GdQg##Sn73&{b850KNu4_8ZJb7( z4#1o7UDi!cQdewZr3W8W+&k~y2}$a9J;-#=UJuwKJ8J@%=$X9^nH*x# z@j7x(xIPuD#-QtPKmk(}|NfsmA!t|aRF9e6RV4ew%|2jn1X!MhF6gTc`lgA$te+3} zie4JgOZ$4=KLMg;NSC1eNU)!Z8Qc}xV8+GGIM`SR#LOO=-8XLbMRq_8uU8{GK5oYA zS$vpgCdBbJQoGQFqrb|L05<=-8hUH1`t6jM!8N80W@6k-kNi%({6tCJLSOZR1UGs3%gK> z{HTyV<^W#s(N_ZW=R|Ta%GwLx%_8J-(53{Tyi%m}=DOn0dj1DT{EJ2UDuDJUr -%s: - -"""%(self.getName(),r.getName(),self.getName(),self.getValue(r,bugtype,getConfigOption)) - -class SelectionParameter (ReporterParameter): - def __init__(self, n, values): - ReporterParameter.__init__(self,n) - self.values = values - - def getHTML(self,r,bugtype,getConfigOption): - default = self.getValue(r,bugtype,getConfigOption) - return """\ - -%s:"""%(self.getName(),r.getName(),self.getName(),'\n'.join(["""\ -"""%(o[0], - o[0] == default and ' selected="selected"' or '', - o[1]) for o in self.values])) - -#===------------------------------------------------------------------------===# -# Reporters -#===------------------------------------------------------------------------===# - -class EmailReporter(object): - def getName(self): - return 'Email' - - def getParameters(self): - return [TextParameter(x) for x in ['To', 'From', 'SMTP Server', 'SMTP Port']] - - # Lifted from python email module examples. - def attachFile(self, outer, path): - # Guess the content type based on the file's extension. Encoding - # will be ignored, although we should check for simple things like - # gzip'd or compressed files. - ctype, encoding = mimetypes.guess_type(path) - if ctype is None or encoding is not None: - # No guess could be made, or the file is encoded (compressed), so - # use a generic bag-of-bits type. - ctype = 'application/octet-stream' - maintype, subtype = ctype.split('/', 1) - if maintype == 'text': - fp = open(path) - # Note: we should handle calculating the charset - msg = MIMEText(fp.read(), _subtype=subtype) - fp.close() - else: - fp = open(path, 'rb') - msg = MIMEBase(maintype, subtype) - msg.set_payload(fp.read()) - fp.close() - # Encode the payload using Base64 - encoders.encode_base64(msg) - # Set the filename parameter - msg.add_header('Content-Disposition', 'attachment', filename=os.path.basename(path)) - outer.attach(msg) - - def fileReport(self, report, parameters): - mainMsg = """\ -BUG REPORT ---- -Title: %s -Description: %s -"""%(report.title, report.description) - - if not parameters.get('To'): - raise ReportFailure('No "To" address specified.') - if not parameters.get('From'): - raise ReportFailure('No "From" address specified.') - - msg = MIMEMultipart() - msg['Subject'] = 'BUG REPORT: %s'%(report.title) - # FIXME: Get config parameters - msg['To'] = parameters.get('To') - msg['From'] = parameters.get('From') - msg.preamble = mainMsg - - msg.attach(MIMEText(mainMsg, _subtype='text/plain')) - for file in report.files: - self.attachFile(msg, file) - - try: - s = smtplib.SMTP(host=parameters.get('SMTP Server'), - port=parameters.get('SMTP Port')) - s.sendmail(msg['From'], msg['To'], msg.as_string()) - s.close() - except: - raise ReportFailure('Unable to send message via SMTP.') - - return "Message sent!" - -class BugzillaReporter(object): - def getName(self): - return 'Bugzilla' - - def getParameters(self): - return [TextParameter(x) for x in ['URL','Product']] - - def fileReport(self, report, parameters): - raise NotImplementedError - - -class RadarClassificationParameter(SelectionParameter): - def __init__(self): - SelectionParameter.__init__(self,"Classification", - [['1', 'Security'], ['2', 'Crash/Hang/Data Loss'], - ['3', 'Performance'], ['4', 'UI/Usability'], - ['6', 'Serious Bug'], ['7', 'Other']]) - - def saveConfigValue(self): - return False - - def getValue(self,r,bugtype,getConfigOption): - if bugtype.find("leak") != -1: - return '3' - elif bugtype.find("dereference") != -1: - return '2' - elif bugtype.find("missing ivar release") != -1: - return '3' - else: - return '7' - -class RadarReporter(object): - @staticmethod - def isAvailable(): - # FIXME: Find this .scpt better - path = os.path.join(os.path.dirname(__file__),'../share/scan-view/GetRadarVersion.scpt') - try: - p = subprocess.Popen(['osascript',path], - stdout=subprocess.PIPE, stderr=subprocess.PIPE) - except: - return False - data,err = p.communicate() - res = p.wait() - # FIXME: Check version? Check for no errors? - return res == 0 - - def getName(self): - return 'Radar' - - def getParameters(self): - return [ TextParameter('Component'), TextParameter('Component Version'), - RadarClassificationParameter() ] - - def fileReport(self, report, parameters): - component = parameters.get('Component', '') - componentVersion = parameters.get('Component Version', '') - classification = parameters.get('Classification', '') - personID = "" - diagnosis = "" - config = "" - - if not component.strip(): - component = 'Bugs found by clang Analyzer' - if not componentVersion.strip(): - componentVersion = 'X' - - script = os.path.join(os.path.dirname(__file__),'../share/scan-view/FileRadar.scpt') - args = ['osascript', script, component, componentVersion, classification, personID, report.title, - report.description, diagnosis, config] + [os.path.abspath(f) for f in report.files] -# print >>sys.stderr, args - try: - p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - except: - raise ReportFailure("Unable to file radar (AppleScript failure).") - data, err = p.communicate() - res = p.wait() - - if res: - raise ReportFailure("Unable to file radar (AppleScript failure).") - - try: - values = eval(data) - except: - raise ReportFailure("Unable to process radar results.") - - # We expect (int: bugID, str: message) - if len(values) != 2 or not isinstance(values[0], int): - raise ReportFailure("Unable to process radar results.") - - bugID,message = values - bugID = int(bugID) - - if not bugID: - raise ReportFailure(message) - - return "Filed: %d"%(bugID,bugID) - -### - -def getReporters(): - reporters = [] - if RadarReporter.isAvailable(): - reporters.append(RadarReporter()) - reporters.append(EmailReporter()) - return reporters - From ec17c4f0755bdc37e6788113909368a63d0a3b97 Mon Sep 17 00:00:00 2001 From: Zi Xuan Wu Date: Tue, 22 Dec 2020 10:56:43 +0800 Subject: [PATCH 060/378] [CSKY 3/n] Add bare-bones C-SKY MCTargetDesc Add basis of CSKY MCTargetDesc and it's enough to compile and link but doesn't yet do anything particularly useful. Once an ASM parser and printer are added in the next two patches, the whole thing can be usefully tested. Differential Revision: https://reviews.llvm.org/D93372 --- llvm/lib/Target/CSKY/CMakeLists.txt | 2 + llvm/lib/Target/CSKY/CSKYInstrInfo.td | 6 +- .../Target/CSKY/MCTargetDesc/CMakeLists.txt | 15 ++++ .../CSKY/MCTargetDesc/CSKYAsmBackend.cpp | 69 ++++++++++++++++++ .../Target/CSKY/MCTargetDesc/CSKYAsmBackend.h | 39 ++++++++++ .../CSKY/MCTargetDesc/CSKYELFObjectWriter.cpp | 45 ++++++++++++ .../CSKY/MCTargetDesc/CSKYMCAsmInfo.cpp | 25 +++++++ .../Target/CSKY/MCTargetDesc/CSKYMCAsmInfo.h | 29 ++++++++ .../CSKY/MCTargetDesc/CSKYMCCodeEmitter.cpp | 71 +++++++++++++++++++ .../CSKY/MCTargetDesc/CSKYMCCodeEmitter.h | 61 ++++++++++++++++ .../CSKY/MCTargetDesc/CSKYMCTargetDesc.cpp | 62 ++++++++++++++++ .../CSKY/MCTargetDesc/CSKYMCTargetDesc.h | 48 +++++++++++++ .../Target/CSKY/TargetInfo/CSKYTargetInfo.cpp | 5 -- 13 files changed, 469 insertions(+), 8 deletions(-) create mode 100644 llvm/lib/Target/CSKY/MCTargetDesc/CMakeLists.txt create mode 100644 llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.cpp create mode 100644 llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.h create mode 100644 llvm/lib/Target/CSKY/MCTargetDesc/CSKYELFObjectWriter.cpp create mode 100644 llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCAsmInfo.cpp create mode 100644 llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCAsmInfo.h create mode 100644 llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCCodeEmitter.cpp create mode 100644 llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCCodeEmitter.h create mode 100644 llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.cpp create mode 100644 llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.h diff --git a/llvm/lib/Target/CSKY/CMakeLists.txt b/llvm/lib/Target/CSKY/CMakeLists.txt index 390b8ea4c8ce5..ec487ed34bbf9 100644 --- a/llvm/lib/Target/CSKY/CMakeLists.txt +++ b/llvm/lib/Target/CSKY/CMakeLists.txt @@ -4,6 +4,7 @@ set(LLVM_TARGET_DEFINITIONS CSKY.td) tablegen(LLVM CSKYGenRegisterInfo.inc -gen-register-info) tablegen(LLVM CSKYGenInstrInfo.inc -gen-instr-info) +tablegen(LLVM CSKYGenMCCodeEmitter.inc -gen-emitter) add_public_tablegen_target(CSKYCommonTableGen) @@ -22,3 +23,4 @@ add_llvm_target(CSKYCodeGen ) add_subdirectory(TargetInfo) +add_subdirectory(MCTargetDesc) diff --git a/llvm/lib/Target/CSKY/CSKYInstrInfo.td b/llvm/lib/Target/CSKY/CSKYInstrInfo.td index afc82437e649d..7add217530e19 100644 --- a/llvm/lib/Target/CSKY/CSKYInstrInfo.td +++ b/llvm/lib/Target/CSKY/CSKYInstrInfo.td @@ -24,17 +24,17 @@ include "CSKYInstrFormats.td" class oimm : Operand, ImmLeaf(Imm - 1);"> { - let EncoderMethod = "getOImmOpValue<"#num#">"; + let EncoderMethod = "getOImmOpValue"; } class uimm : Operand, ImmLeaf(Imm);"> { - let EncoderMethod = "getImmOpValue<"#num#", "#shift#">"; + let EncoderMethod = "getImmOpValue<"#shift#">"; } class simm : Operand, ImmLeaf(Imm);"> { - let EncoderMethod = "getImmOpValue<"#num#", "#shift#">"; + let EncoderMethod = "getImmOpValue<"#shift#">"; } def nimm_XFORM : SDNodeXForm +CSKYAsmBackend::createObjectTargetWriter() const { + return createCSKYELFObjectWriter(); +} + +unsigned int CSKYAsmBackend::getNumFixupKinds() const { return 1; } + +void CSKYAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target, + MutableArrayRef Data, uint64_t Value, + bool IsResolved, + const MCSubtargetInfo *STI) const { + return; +} + +bool CSKYAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, + const MCRelaxableFragment *DF, + const MCAsmLayout &Layout) const { + return false; +} + +void CSKYAsmBackend::relaxInstruction(MCInst &Inst, + const MCSubtargetInfo &STI) const { + llvm_unreachable("CSKYAsmBackend::relaxInstruction() unimplemented"); +} + +bool CSKYAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count) const { + if (Count % 2) + return false; + + // MOV32 r0, r0 + while (Count >= 4) { + OS.write("\xc4\x00\x48\x20", 4); + Count -= 4; + } + // MOV16 r0, r0 + if (Count) + OS.write("\x6c\x03", 2); + + return true; +} + +MCAsmBackend *llvm::createCSKYAsmBackend(const Target &T, + const MCSubtargetInfo &STI, + const MCRegisterInfo &MRI, + const MCTargetOptions &Options) { + return new CSKYAsmBackend(STI, Options); +} diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.h b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.h new file mode 100644 index 0000000000000..b4cba4264e032 --- /dev/null +++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.h @@ -0,0 +1,39 @@ +//===-- CSKYAsmBackend.h - CSKY Assembler Backend -------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_CSKY_MCTARGETDESC_CSKYASMBACKEND_H +#define LLVM_LIB_TARGET_CSKY_MCTARGETDESC_CSKYASMBACKEND_H + +#include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCTargetOptions.h" + +namespace llvm { + +class CSKYAsmBackend : public MCAsmBackend { + +public: + CSKYAsmBackend(const MCSubtargetInfo &STI, const MCTargetOptions &OP) + : MCAsmBackend(support::little) {} + + unsigned int getNumFixupKinds() const override; + void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target, MutableArrayRef Data, + uint64_t Value, bool IsResolved, + const MCSubtargetInfo *STI) const override; + bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, + const MCRelaxableFragment *DF, + const MCAsmLayout &Layout) const override; + void relaxInstruction(MCInst &Inst, + const MCSubtargetInfo &STI) const override; + bool writeNopData(raw_ostream &OS, uint64_t Count) const override; + std::unique_ptr + createObjectTargetWriter() const override; +}; +} // namespace llvm + +#endif // LLVM_LIB_TARGET_CSKY_MCTARGETDESC_CSKYASMBACKEND_H diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYELFObjectWriter.cpp b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYELFObjectWriter.cpp new file mode 100644 index 0000000000000..1636326322908 --- /dev/null +++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYELFObjectWriter.cpp @@ -0,0 +1,45 @@ +//===-- CSKYELFObjectWriter.cpp - CSKY ELF Writer -------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "CSKYMCTargetDesc.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCELFObjectWriter.h" +#include "llvm/MC/MCObjectWriter.h" + +#define DEBUG_TYPE "csky-elf-object-writer" + +using namespace llvm; + +namespace { + +class CSKYELFObjectWriter : public MCELFObjectTargetWriter { +public: + CSKYELFObjectWriter(uint8_t OSABI = 0) + : MCELFObjectTargetWriter(false, OSABI, ELF::EM_CSKY, true){}; + ~CSKYELFObjectWriter() {} + + unsigned getRelocType(MCContext &Ctx, const MCValue &Target, + const MCFixup &Fixup, bool IsPCRel) const override; +}; + +} // namespace + +unsigned CSKYELFObjectWriter::getRelocType(MCContext &Ctx, + const MCValue &Target, + const MCFixup &Fixup, + bool IsPCRel) const { + // Determine the type of the relocation. + switch ((unsigned)Fixup.getKind()) { + default: + llvm_unreachable("invalid fixup kind!"); + } +} + +std::unique_ptr llvm::createCSKYELFObjectWriter() { + return std::make_unique(); +} diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCAsmInfo.cpp b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCAsmInfo.cpp new file mode 100644 index 0000000000000..668247bbbd87f --- /dev/null +++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCAsmInfo.cpp @@ -0,0 +1,25 @@ +//===-- CSKYMCAsmInfo.cpp - CSKY Asm properties ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains the declarations of the CSKYMCAsmInfo properties. +// +//===----------------------------------------------------------------------===// + +#include "CSKYMCAsmInfo.h" +#include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/MC/MCStreamer.h" + +using namespace llvm; + +void CSKYMCAsmInfo::anchor() {} + +CSKYMCAsmInfo::CSKYMCAsmInfo(const Triple &TargetTriple) { + AlignmentIsInBytes = false; + SupportsDebugInformation = true; + CommentString = "#"; +} diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCAsmInfo.h b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCAsmInfo.h new file mode 100644 index 0000000000000..3e0609f195311 --- /dev/null +++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCAsmInfo.h @@ -0,0 +1,29 @@ +//===-- CSKYMCAsmInfo.h - CSKY Asm Info ------------------------*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of the CSKYMCAsmInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_CSKY_MCTARGETDESC_CSKYMCASMINFO_H +#define LLVM_LIB_TARGET_CSKY_MCTARGETDESC_CSKYMCASMINFO_H + +#include "llvm/MC/MCAsmInfoELF.h" + +namespace llvm { +class Triple; + +class CSKYMCAsmInfo : public MCAsmInfoELF { + void anchor() override; + +public: + explicit CSKYMCAsmInfo(const Triple &TargetTriple); +}; +} // namespace llvm + +#endif // LLVM_LIB_TARGET_CSKY_MCTARGETDESC_CSKYMCASMINFO_H diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCCodeEmitter.cpp b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCCodeEmitter.cpp new file mode 100644 index 0000000000000..ed2b0e77b81af --- /dev/null +++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCCodeEmitter.cpp @@ -0,0 +1,71 @@ +//===-- CSKYMCCodeEmitter.cpp - CSKY Code Emitter interface ---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the CSKYMCCodeEmitter class. +// +//===----------------------------------------------------------------------===// + +#include "CSKYMCCodeEmitter.h" +#include "MCTargetDesc/CSKYMCTargetDesc.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/MC/MCInstBuilder.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/Support/EndianStream.h" + +using namespace llvm; + +#define DEBUG_TYPE "csky-mccode-emitter" + +STATISTIC(MCNumEmitted, "Number of MC instructions emitted"); + +unsigned CSKYMCCodeEmitter::getOImmOpValue(const MCInst &MI, unsigned Idx, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + const MCOperand &MO = MI.getOperand(Idx); + assert(MO.isImm() && "Unexpected MO type."); + return MO.getImm() - 1; +} + +void CSKYMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + const MCInstrDesc &Desc = MII.get(MI.getOpcode()); + unsigned Size = Desc.getSize(); + uint32_t Bin = getBinaryCodeForInstr(MI, Fixups, STI); + + uint16_t LO16 = static_cast(Bin); + uint16_t HI16 = static_cast(Bin >> 16); + + if (Size == 4) + support::endian::write(OS, HI16, support::little); + + support::endian::write(OS, LO16, support::little); + ++MCNumEmitted; // Keep track of the # of mi's emitted. +} + +unsigned +CSKYMCCodeEmitter::getMachineOpValue(const MCInst &MI, const MCOperand &MO, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + if (MO.isReg()) + return Ctx.getRegisterInfo()->getEncodingValue(MO.getReg()); + + if (MO.isImm()) + return static_cast(MO.getImm()); + + llvm_unreachable("Unhandled expression!"); + return 0; +} + +MCCodeEmitter *llvm::createCSKYMCCodeEmitter(const MCInstrInfo &MCII, + const MCRegisterInfo &MRI, + MCContext &Ctx) { + return new CSKYMCCodeEmitter(Ctx, MCII); +} + +#include "CSKYGenMCCodeEmitter.inc" diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCCodeEmitter.h b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCCodeEmitter.h new file mode 100644 index 0000000000000..c850a4bab7450 --- /dev/null +++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCCodeEmitter.h @@ -0,0 +1,61 @@ +//===-- CSKYMCCodeEmitter.cpp - CSKY Code Emitter interface ---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the CSKYMCCodeEmitter class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_CSKY_MCTARGETDESC_CSKYMCCODEEMITTER_H +#define LLVM_LIB_TARGET_CSKY_MCTARGETDESC_CSKYMCCODEEMITTER_H + +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCContext.h" + +namespace llvm { + +class CSKYMCCodeEmitter : public MCCodeEmitter { + MCContext &Ctx; + const MCInstrInfo &MII; + +public: + CSKYMCCodeEmitter(MCContext &Ctx, const MCInstrInfo &MII) + : Ctx(Ctx), MII(MII) {} + + ~CSKYMCCodeEmitter() {} + + void encodeInstruction(const MCInst &Inst, raw_ostream &OS, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const override; + + // Generated by tablegen. + uint64_t getBinaryCodeForInstr(const MCInst &MI, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + + // Default encoding method used by tablegen. + unsigned getMachineOpValue(const MCInst &MI, const MCOperand &MO, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + + template + unsigned getImmOpValue(const MCInst &MI, unsigned Idx, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + const MCOperand &MO = MI.getOperand(Idx); + assert(MO.isImm() && "Unexpected MO type."); + return (MO.getImm() >> shift); + } + + unsigned getOImmOpValue(const MCInst &MI, unsigned Idx, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; +}; + +} // namespace llvm + +#endif // LLVM_LIB_TARGET_CSKY_MCTARGETDESC_CSKYMCCODEEMITTER_H diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.cpp b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.cpp new file mode 100644 index 0000000000000..876000a370047 --- /dev/null +++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.cpp @@ -0,0 +1,62 @@ +//===-- CSKYMCTargetDesc.cpp - CSKY Target Descriptions -------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// This file provides CSKY specific target descriptions. +/// +//===----------------------------------------------------------------------===// + +#include "CSKYMCTargetDesc.h" +#include "CSKYAsmBackend.h" +#include "CSKYMCAsmInfo.h" +#include "CSKYMCCodeEmitter.h" +#include "TargetInfo/CSKYTargetInfo.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/TargetRegistry.h" + +#define GET_INSTRINFO_MC_DESC +#include "CSKYGenInstrInfo.inc" + +#define GET_REGINFO_MC_DESC +#include "CSKYGenRegisterInfo.inc" + +using namespace llvm; + +static MCAsmInfo *createCSKYMCAsmInfo(const MCRegisterInfo &MRI, + const Triple &TT, + const MCTargetOptions &Options) { + MCAsmInfo *MAI = new CSKYMCAsmInfo(TT); + + // Initial state of the frame pointer is SP. + unsigned Reg = MRI.getDwarfRegNum(CSKY::R14, true); + MCCFIInstruction Inst = MCCFIInstruction::cfiDefCfa(nullptr, Reg, 0); + MAI->addInitialFrameState(Inst); + return MAI; +} + +static MCInstrInfo *createCSKYMCInstrInfo() { + MCInstrInfo *Info = new MCInstrInfo(); + InitCSKYMCInstrInfo(Info); + return Info; +} + +static MCRegisterInfo *createCSKYMCRegisterInfo(const Triple &TT) { + MCRegisterInfo *Info = new MCRegisterInfo(); + InitCSKYMCRegisterInfo(Info, CSKY::R15); + return Info; +} + +extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeCSKYTargetMC() { + auto &CSKYTarget = getTheCSKYTarget(); + TargetRegistry::RegisterMCAsmBackend(CSKYTarget, createCSKYAsmBackend); + TargetRegistry::RegisterMCAsmInfo(CSKYTarget, createCSKYMCAsmInfo); + TargetRegistry::RegisterMCInstrInfo(CSKYTarget, createCSKYMCInstrInfo); + TargetRegistry::RegisterMCRegInfo(CSKYTarget, createCSKYMCRegisterInfo); + TargetRegistry::RegisterMCCodeEmitter(CSKYTarget, createCSKYMCCodeEmitter); +} diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.h b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.h new file mode 100644 index 0000000000000..da8a3b63a2f91 --- /dev/null +++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.h @@ -0,0 +1,48 @@ +//===-- CSKYMCTargetDesc.h - CSKY Target Descriptions -----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file provides CSKY specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_CSKY_MCTARGETDESC_CSKYMCTARGETDESC_H +#define LLVM_LIB_TARGET_CSKY_MCTARGETDESC_CSKYMCTARGETDESC_H + +#include "llvm/MC/MCTargetOptions.h" +#include + +namespace llvm { +class MCAsmBackend; +class MCCodeEmitter; +class MCContext; +class MCInstrInfo; +class MCRegisterInfo; +class MCObjectTargetWriter; +class MCRegisterInfo; +class MCSubtargetInfo; +class Target; +class Triple; + +std::unique_ptr createCSKYELFObjectWriter(); + +MCAsmBackend *createCSKYAsmBackend(const Target &T, const MCSubtargetInfo &STI, + const MCRegisterInfo &MRI, + const MCTargetOptions &Options); + +MCCodeEmitter *createCSKYMCCodeEmitter(const MCInstrInfo &MCII, + const MCRegisterInfo &MRI, + MCContext &Ctx); +} // namespace llvm + +#define GET_REGINFO_ENUM +#include "CSKYGenRegisterInfo.inc" + +#define GET_INSTRINFO_ENUM +#include "CSKYGenInstrInfo.inc" + +#endif // LLVM_LIB_TARGET_CSKY_MCTARGETDESC_CSKYMCTARGETDESC_H diff --git a/llvm/lib/Target/CSKY/TargetInfo/CSKYTargetInfo.cpp b/llvm/lib/Target/CSKY/TargetInfo/CSKYTargetInfo.cpp index 45a84703370e9..800b10517aa7d 100644 --- a/llvm/lib/Target/CSKY/TargetInfo/CSKYTargetInfo.cpp +++ b/llvm/lib/Target/CSKY/TargetInfo/CSKYTargetInfo.cpp @@ -18,8 +18,3 @@ Target &llvm::getTheCSKYTarget() { extern "C" void LLVMInitializeCSKYTargetInfo() { RegisterTarget X(getTheCSKYTarget(), "csky", "C-SKY", "CSKY"); } - -// FIXME: Temporary stub - this function must be defined for linking -// to succeed and will be called unconditionally by llc, so must be a no-op. -// Remove once this function is properly implemented. -extern "C" void LLVMInitializeCSKYTargetMC() {} From e8ade4569b7b5343ae8d4d7c9d83706eca0e8e90 Mon Sep 17 00:00:00 2001 From: Bing1 Yu Date: Tue, 22 Dec 2020 13:09:22 +0800 Subject: [PATCH 061/378] [LegalizeType] When LegalizeType procedure widens a masked_gather, set MemoryType's EltNum equal to Result's EltNum When LegalizeType procedure widens a masked_gather, set MemoryType's EltNum equal to Result's EltNum. As I mentioned in https://reviews.llvm.org/D91092, in previous code, If we have a v17i32's masked_gather in avx512, we widen it to a v32i32's masked_gather with a v17i32's MemoryType. When the SplitVecRes_MGATHER process this v32i32's masked_gather, GetSplitDestVTs will assert fail since what you are going to split is v17i32. Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D93610 --- .../SelectionDAG/LegalizeVectorTypes.cpp | 19 +- .../X86/masked_gather_scatter_widen.ll | 297 ++++++++++++++++++ 2 files changed, 310 insertions(+), 6 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index f21ec1dbdfe5f..57cb364f1939a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -4044,10 +4044,13 @@ SDValue DAGTypeLegalizer::WidenVecRes_MGATHER(MaskedGatherSDNode *N) { Index = ModifyToType(Index, WideIndexVT); SDValue Ops[] = { N->getChain(), PassThru, Mask, N->getBasePtr(), Index, Scale }; + + // Widen the MemoryType + EVT WideMemVT = EVT::getVectorVT(*DAG.getContext(), + N->getMemoryVT().getScalarType(), NumElts); SDValue Res = DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other), - N->getMemoryVT(), dl, Ops, - N->getMemOperand(), N->getIndexType(), - N->getExtensionType()); + WideMemVT, dl, Ops, N->getMemOperand(), + N->getIndexType(), N->getExtensionType()); // Legalize the chain result - switch anything that used the old chain to // use the new one. @@ -4881,6 +4884,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_MSCATTER(SDNode *N, unsigned OpNo) { SDValue Mask = MSC->getMask(); SDValue Index = MSC->getIndex(); SDValue Scale = MSC->getScale(); + EVT WideMemVT = MSC->getMemoryVT(); if (OpNo == 1) { DataOp = GetWidenedVector(DataOp); @@ -4897,6 +4901,10 @@ SDValue DAGTypeLegalizer::WidenVecOp_MSCATTER(SDNode *N, unsigned OpNo) { EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), MaskVT.getVectorElementType(), NumElts); Mask = ModifyToType(Mask, WideMaskVT, true); + + // Widen the MemoryType + WideMemVT = EVT::getVectorVT(*DAG.getContext(), + MSC->getMemoryVT().getScalarType(), NumElts); } else if (OpNo == 4) { // Just widen the index. It's allowed to have extra elements. Index = GetWidenedVector(Index); @@ -4905,9 +4913,8 @@ SDValue DAGTypeLegalizer::WidenVecOp_MSCATTER(SDNode *N, unsigned OpNo) { SDValue Ops[] = {MSC->getChain(), DataOp, Mask, MSC->getBasePtr(), Index, Scale}; - return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), - MSC->getMemoryVT(), SDLoc(N), Ops, - MSC->getMemOperand(), MSC->getIndexType(), + return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), WideMemVT, SDLoc(N), + Ops, MSC->getMemOperand(), MSC->getIndexType(), MSC->isTruncatingStore()); } diff --git a/llvm/test/CodeGen/X86/masked_gather_scatter_widen.ll b/llvm/test/CodeGen/X86/masked_gather_scatter_widen.ll index ab62c3b926926..517553d455ae7 100644 --- a/llvm/test/CodeGen/X86/masked_gather_scatter_widen.ll +++ b/llvm/test/CodeGen/X86/masked_gather_scatter_widen.ll @@ -247,6 +247,303 @@ define void @test_scatter_v2i32_data_index(<2 x i32> %a1, i32* %base, <2 x i32> ret void } +define void @test_mscatter_v17f32(float* %base, <17 x i32> %index, <17 x float> %val) +; WIDEN_SKX-LABEL: test_mscatter_v17f32: +; WIDEN_SKX: # %bb.0: +; WIDEN_SKX-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[2,3] +; WIDEN_SKX-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0,1],xmm6[0],xmm4[3] +; WIDEN_SKX-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0,1,2],xmm7[0] +; WIDEN_SKX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] +; WIDEN_SKX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3] +; WIDEN_SKX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm3[0] +; WIDEN_SKX-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0 +; WIDEN_SKX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; WIDEN_SKX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3] +; WIDEN_SKX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],mem[0],xmm1[3] +; WIDEN_SKX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],mem[0] +; WIDEN_SKX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; WIDEN_SKX-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],mem[0],xmm2[2,3] +; WIDEN_SKX-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],mem[0],xmm2[3] +; WIDEN_SKX-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],mem[0] +; WIDEN_SKX-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 +; WIDEN_SKX-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 +; WIDEN_SKX-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero +; WIDEN_SKX-NEXT: vpinsrd $1, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; WIDEN_SKX-NEXT: vpinsrd $2, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; WIDEN_SKX-NEXT: vpinsrd $3, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; WIDEN_SKX-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero +; WIDEN_SKX-NEXT: vpinsrd $1, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; WIDEN_SKX-NEXT: vpinsrd $2, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; WIDEN_SKX-NEXT: vpinsrd $3, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; WIDEN_SKX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 +; WIDEN_SKX-NEXT: vmovd %esi, %xmm2 +; WIDEN_SKX-NEXT: vpinsrd $1, %edx, %xmm2, %xmm2 +; WIDEN_SKX-NEXT: vpinsrd $2, %ecx, %xmm2, %xmm2 +; WIDEN_SKX-NEXT: vpinsrd $3, %r8d, %xmm2, %xmm2 +; WIDEN_SKX-NEXT: vmovd %r9d, %xmm3 +; WIDEN_SKX-NEXT: vpinsrd $1, {{[0-9]+}}(%rsp), %xmm3, %xmm3 +; WIDEN_SKX-NEXT: vpinsrd $2, {{[0-9]+}}(%rsp), %xmm3, %xmm3 +; WIDEN_SKX-NEXT: vpinsrd $3, {{[0-9]+}}(%rsp), %xmm3, %xmm3 +; WIDEN_SKX-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 +; WIDEN_SKX-NEXT: vinserti64x4 $1, %ymm1, %zmm2, %zmm1 +; WIDEN_SKX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; WIDEN_SKX-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero +; WIDEN_SKX-NEXT: kxnorw %k0, %k0, %k1 +; WIDEN_SKX-NEXT: vscatterdps %zmm0, (%rdi,%zmm1,4) {%k1} +; WIDEN_SKX-NEXT: movw $1, %ax +; WIDEN_SKX-NEXT: kmovw %eax, %k1 +; WIDEN_SKX-NEXT: vscatterdps %zmm2, (%rdi,%zmm3,4) {%k1} +; WIDEN_SKX-NEXT: vzeroupper +; WIDEN_SKX-NEXT: retq +; +; WIDEN_KNL-LABEL: test_mscatter_v17f32: +; WIDEN_KNL: # %bb.0: +; WIDEN_KNL-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[2,3] +; WIDEN_KNL-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0,1],xmm6[0],xmm4[3] +; WIDEN_KNL-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0,1,2],xmm7[0] +; WIDEN_KNL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] +; WIDEN_KNL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3] +; WIDEN_KNL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm3[0] +; WIDEN_KNL-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0 +; WIDEN_KNL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; WIDEN_KNL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3] +; WIDEN_KNL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],mem[0],xmm1[3] +; WIDEN_KNL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],mem[0] +; WIDEN_KNL-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; WIDEN_KNL-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],mem[0],xmm2[2,3] +; WIDEN_KNL-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],mem[0],xmm2[3] +; WIDEN_KNL-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],mem[0] +; WIDEN_KNL-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 +; WIDEN_KNL-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 +; WIDEN_KNL-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero +; WIDEN_KNL-NEXT: vpinsrd $1, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; WIDEN_KNL-NEXT: vpinsrd $2, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; WIDEN_KNL-NEXT: vpinsrd $3, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; WIDEN_KNL-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero +; WIDEN_KNL-NEXT: vpinsrd $1, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; WIDEN_KNL-NEXT: vpinsrd $2, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; WIDEN_KNL-NEXT: vpinsrd $3, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; WIDEN_KNL-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 +; WIDEN_KNL-NEXT: vmovd %esi, %xmm2 +; WIDEN_KNL-NEXT: vpinsrd $1, %edx, %xmm2, %xmm2 +; WIDEN_KNL-NEXT: vpinsrd $2, %ecx, %xmm2, %xmm2 +; WIDEN_KNL-NEXT: vpinsrd $3, %r8d, %xmm2, %xmm2 +; WIDEN_KNL-NEXT: vmovd %r9d, %xmm3 +; WIDEN_KNL-NEXT: vpinsrd $1, {{[0-9]+}}(%rsp), %xmm3, %xmm3 +; WIDEN_KNL-NEXT: vpinsrd $2, {{[0-9]+}}(%rsp), %xmm3, %xmm3 +; WIDEN_KNL-NEXT: vpinsrd $3, {{[0-9]+}}(%rsp), %xmm3, %xmm3 +; WIDEN_KNL-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 +; WIDEN_KNL-NEXT: vinserti64x4 $1, %ymm1, %zmm2, %zmm1 +; WIDEN_KNL-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; WIDEN_KNL-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero +; WIDEN_KNL-NEXT: kxnorw %k0, %k0, %k1 +; WIDEN_KNL-NEXT: vscatterdps %zmm0, (%rdi,%zmm1,4) {%k1} +; WIDEN_KNL-NEXT: movw $1, %ax +; WIDEN_KNL-NEXT: kmovw %eax, %k1 +; WIDEN_KNL-NEXT: vscatterdps %zmm2, (%rdi,%zmm3,4) {%k1} +; WIDEN_KNL-NEXT: vzeroupper +; WIDEN_KNL-NEXT: retq +; +; WIDEN_AVX2-LABEL: test_mscatter_v17f32: +; WIDEN_AVX2: # %bb.0: +; WIDEN_AVX2-NEXT: vmovq %rdi, %xmm8 +; WIDEN_AVX2-NEXT: vpbroadcastq %xmm8, %ymm9 +; WIDEN_AVX2-NEXT: vmovd %esi, %xmm10 +; WIDEN_AVX2-NEXT: vpinsrd $1, %edx, %xmm10, %xmm10 +; WIDEN_AVX2-NEXT: vpinsrd $2, %ecx, %xmm10, %xmm10 +; WIDEN_AVX2-NEXT: vpinsrd $3, %r8d, %xmm10, %xmm10 +; WIDEN_AVX2-NEXT: vpmovsxdq %xmm10, %ymm10 +; WIDEN_AVX2-NEXT: vpsllq $2, %ymm10, %ymm10 +; WIDEN_AVX2-NEXT: vpaddq %ymm10, %ymm9, %ymm10 +; WIDEN_AVX2-NEXT: vmovq %xmm10, %rax +; WIDEN_AVX2-NEXT: vmovss %xmm0, (%rax) +; WIDEN_AVX2-NEXT: vpextrq $1, %xmm10, %rax +; WIDEN_AVX2-NEXT: vmovss %xmm1, (%rax) +; WIDEN_AVX2-NEXT: vextracti128 $1, %ymm10, %xmm0 +; WIDEN_AVX2-NEXT: vmovq %xmm0, %rax +; WIDEN_AVX2-NEXT: vmovss %xmm2, (%rax) +; WIDEN_AVX2-NEXT: vpextrq $1, %xmm0, %rax +; WIDEN_AVX2-NEXT: vmovss %xmm3, (%rax) +; WIDEN_AVX2-NEXT: vmovd %r9d, %xmm0 +; WIDEN_AVX2-NEXT: vpinsrd $1, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; WIDEN_AVX2-NEXT: vpinsrd $2, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; WIDEN_AVX2-NEXT: vpinsrd $3, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; WIDEN_AVX2-NEXT: vpmovsxdq %xmm0, %ymm0 +; WIDEN_AVX2-NEXT: vpsllq $2, %ymm0, %ymm0 +; WIDEN_AVX2-NEXT: vpaddq %ymm0, %ymm9, %ymm0 +; WIDEN_AVX2-NEXT: vmovq %xmm0, %rax +; WIDEN_AVX2-NEXT: vmovss %xmm4, (%rax) +; WIDEN_AVX2-NEXT: vpextrq $1, %xmm0, %rax +; WIDEN_AVX2-NEXT: vmovss %xmm5, (%rax) +; WIDEN_AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 +; WIDEN_AVX2-NEXT: vmovq %xmm0, %rax +; WIDEN_AVX2-NEXT: vmovss %xmm6, (%rax) +; WIDEN_AVX2-NEXT: vpextrq $1, %xmm0, %rax +; WIDEN_AVX2-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; WIDEN_AVX2-NEXT: vpinsrd $1, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; WIDEN_AVX2-NEXT: vpinsrd $2, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; WIDEN_AVX2-NEXT: vpinsrd $3, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; WIDEN_AVX2-NEXT: vpmovsxdq %xmm0, %ymm0 +; WIDEN_AVX2-NEXT: vpsllq $2, %ymm0, %ymm0 +; WIDEN_AVX2-NEXT: vpaddq %ymm0, %ymm9, %ymm0 +; WIDEN_AVX2-NEXT: vmovss %xmm7, (%rax) +; WIDEN_AVX2-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero +; WIDEN_AVX2-NEXT: vpinsrd $1, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; WIDEN_AVX2-NEXT: vpinsrd $2, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; WIDEN_AVX2-NEXT: vpinsrd $3, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; WIDEN_AVX2-NEXT: vpmovsxdq %xmm1, %ymm1 +; WIDEN_AVX2-NEXT: vpsllq $2, %ymm1, %ymm1 +; WIDEN_AVX2-NEXT: vpaddq %ymm1, %ymm9, %ymm1 +; WIDEN_AVX2-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; WIDEN_AVX2-NEXT: vmovq %xmm1, %rax +; WIDEN_AVX2-NEXT: vmovss %xmm2, (%rax) +; WIDEN_AVX2-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; WIDEN_AVX2-NEXT: vpextrq $1, %xmm1, %rax +; WIDEN_AVX2-NEXT: vmovss %xmm2, (%rax) +; WIDEN_AVX2-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; WIDEN_AVX2-NEXT: vextracti128 $1, %ymm1, %xmm1 +; WIDEN_AVX2-NEXT: vmovq %xmm1, %rax +; WIDEN_AVX2-NEXT: vmovss %xmm2, (%rax) +; WIDEN_AVX2-NEXT: vpextrq $1, %xmm1, %rax +; WIDEN_AVX2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; WIDEN_AVX2-NEXT: vmovss %xmm1, (%rax) +; WIDEN_AVX2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; WIDEN_AVX2-NEXT: vmovq %xmm0, %rax +; WIDEN_AVX2-NEXT: vmovss %xmm1, (%rax) +; WIDEN_AVX2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; WIDEN_AVX2-NEXT: vpextrq $1, %xmm0, %rax +; WIDEN_AVX2-NEXT: vmovss %xmm1, (%rax) +; WIDEN_AVX2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; WIDEN_AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 +; WIDEN_AVX2-NEXT: vmovq %xmm0, %rax +; WIDEN_AVX2-NEXT: vmovss %xmm1, (%rax) +; WIDEN_AVX2-NEXT: vpextrq $1, %xmm0, %rax +; WIDEN_AVX2-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; WIDEN_AVX2-NEXT: vmovss %xmm0, (%rax) +; WIDEN_AVX2-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; WIDEN_AVX2-NEXT: vpmovsxdq %xmm0, %xmm0 +; WIDEN_AVX2-NEXT: vpsllq $2, %xmm0, %xmm0 +; WIDEN_AVX2-NEXT: vpaddq %xmm0, %xmm8, %xmm0 +; WIDEN_AVX2-NEXT: vmovq %xmm0, %rax +; WIDEN_AVX2-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; WIDEN_AVX2-NEXT: vmovss %xmm0, (%rax) +; WIDEN_AVX2-NEXT: vzeroupper +; WIDEN_AVX2-NEXT: retq +{ + %gep = getelementptr float, float* %base, <17 x i32> %index + call void @llvm.masked.scatter.v17f32.v17p0f32(<17 x float> %val, <17 x float*> %gep, i32 4, <17 x i1> ) + ret void +} + +define <17 x float> @test_mgather_v17f32(float* %base, <17 x i32> %index) +; WIDEN_SKX-LABEL: test_mgather_v17f32: +; WIDEN_SKX: # %bb.0: +; WIDEN_SKX-NEXT: movq %rdi, %rax +; WIDEN_SKX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; WIDEN_SKX-NEXT: vpinsrd $1, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; WIDEN_SKX-NEXT: vpinsrd $2, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; WIDEN_SKX-NEXT: vpinsrd $3, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; WIDEN_SKX-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero +; WIDEN_SKX-NEXT: vpinsrd $1, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; WIDEN_SKX-NEXT: vpinsrd $2, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; WIDEN_SKX-NEXT: vpinsrd $3, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; WIDEN_SKX-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; WIDEN_SKX-NEXT: vmovd %edx, %xmm1 +; WIDEN_SKX-NEXT: vpinsrd $1, %ecx, %xmm1, %xmm1 +; WIDEN_SKX-NEXT: vpinsrd $2, %r8d, %xmm1, %xmm1 +; WIDEN_SKX-NEXT: vpinsrd $3, %r9d, %xmm1, %xmm1 +; WIDEN_SKX-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero +; WIDEN_SKX-NEXT: vpinsrd $1, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; WIDEN_SKX-NEXT: vpinsrd $2, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; WIDEN_SKX-NEXT: vpinsrd $3, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; WIDEN_SKX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 +; WIDEN_SKX-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 +; WIDEN_SKX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; WIDEN_SKX-NEXT: kxnorw %k0, %k0, %k1 +; WIDEN_SKX-NEXT: vgatherdps (%rsi,%zmm0,4), %zmm2 {%k1} +; WIDEN_SKX-NEXT: movw $1, %cx +; WIDEN_SKX-NEXT: kmovw %ecx, %k1 +; WIDEN_SKX-NEXT: vgatherdps (%rsi,%zmm1,4), %zmm0 {%k1} +; WIDEN_SKX-NEXT: vmovss %xmm0, 64(%rdi) +; WIDEN_SKX-NEXT: vmovaps %zmm2, (%rdi) +; WIDEN_SKX-NEXT: vzeroupper +; WIDEN_SKX-NEXT: retq +; +; WIDEN_KNL-LABEL: test_mgather_v17f32: +; WIDEN_KNL: # %bb.0: +; WIDEN_KNL-NEXT: movq %rdi, %rax +; WIDEN_KNL-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; WIDEN_KNL-NEXT: vpinsrd $1, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; WIDEN_KNL-NEXT: vpinsrd $2, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; WIDEN_KNL-NEXT: vpinsrd $3, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; WIDEN_KNL-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero +; WIDEN_KNL-NEXT: vpinsrd $1, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; WIDEN_KNL-NEXT: vpinsrd $2, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; WIDEN_KNL-NEXT: vpinsrd $3, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; WIDEN_KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; WIDEN_KNL-NEXT: vmovd %edx, %xmm1 +; WIDEN_KNL-NEXT: vpinsrd $1, %ecx, %xmm1, %xmm1 +; WIDEN_KNL-NEXT: vpinsrd $2, %r8d, %xmm1, %xmm1 +; WIDEN_KNL-NEXT: vpinsrd $3, %r9d, %xmm1, %xmm1 +; WIDEN_KNL-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero +; WIDEN_KNL-NEXT: vpinsrd $1, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; WIDEN_KNL-NEXT: vpinsrd $2, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; WIDEN_KNL-NEXT: vpinsrd $3, {{[0-9]+}}(%rsp), %xmm2, %xmm2 +; WIDEN_KNL-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 +; WIDEN_KNL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 +; WIDEN_KNL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; WIDEN_KNL-NEXT: kxnorw %k0, %k0, %k1 +; WIDEN_KNL-NEXT: vgatherdps (%rsi,%zmm0,4), %zmm2 {%k1} +; WIDEN_KNL-NEXT: movw $1, %cx +; WIDEN_KNL-NEXT: kmovw %ecx, %k1 +; WIDEN_KNL-NEXT: vgatherdps (%rsi,%zmm1,4), %zmm0 {%k1} +; WIDEN_KNL-NEXT: vmovss %xmm0, 64(%rdi) +; WIDEN_KNL-NEXT: vmovaps %zmm2, (%rdi) +; WIDEN_KNL-NEXT: vzeroupper +; WIDEN_KNL-NEXT: retq +; +; WIDEN_AVX2-LABEL: test_mgather_v17f32: +; WIDEN_AVX2: # %bb.0: +; WIDEN_AVX2-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; WIDEN_AVX2-NEXT: vpinsrd $1, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; WIDEN_AVX2-NEXT: vpinsrd $2, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; WIDEN_AVX2-NEXT: vpinsrd $3, {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; WIDEN_AVX2-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero +; WIDEN_AVX2-NEXT: vpinsrd $1, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; WIDEN_AVX2-NEXT: vpinsrd $2, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; WIDEN_AVX2-NEXT: vpinsrd $3, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; WIDEN_AVX2-NEXT: movq %rdi, %rax +; WIDEN_AVX2-NEXT: vmovd %edx, %xmm2 +; WIDEN_AVX2-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm2 +; WIDEN_AVX2-NEXT: vpinsrd $2, %r8d, %xmm2, %xmm2 +; WIDEN_AVX2-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero +; WIDEN_AVX2-NEXT: vpinsrd $3, %r9d, %xmm2, %xmm2 +; WIDEN_AVX2-NEXT: vmovd {{.*#+}} xmm4 = mem[0],zero,zero,zero +; WIDEN_AVX2-NEXT: vpinsrd $1, {{[0-9]+}}(%rsp), %xmm4, %xmm4 +; WIDEN_AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; WIDEN_AVX2-NEXT: vpinsrd $2, {{[0-9]+}}(%rsp), %xmm4, %xmm1 +; WIDEN_AVX2-NEXT: vpinsrd $3, {{[0-9]+}}(%rsp), %xmm1, %xmm1 +; WIDEN_AVX2-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1 +; WIDEN_AVX2-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 +; WIDEN_AVX2-NEXT: vgatherdps %ymm2, (%rsi,%ymm1,4), %ymm4 +; WIDEN_AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 +; WIDEN_AVX2-NEXT: vgatherdps %ymm1, (%rsi,%ymm0,4), %ymm2 +; WIDEN_AVX2-NEXT: vmovaps {{.*#+}} xmm0 = [4294967295,0,0,0] +; WIDEN_AVX2-NEXT: vgatherdps %ymm0, (%rsi,%ymm3,4), %ymm1 +; WIDEN_AVX2-NEXT: vmovss %xmm1, 64(%rdi) +; WIDEN_AVX2-NEXT: vmovaps %ymm2, 32(%rdi) +; WIDEN_AVX2-NEXT: vmovaps %ymm4, (%rdi) +; WIDEN_AVX2-NEXT: vzeroupper +; WIDEN_AVX2-NEXT: retq +{ + %gep = getelementptr float, float* %base, <17 x i32> %index + %res = call <17 x float> @llvm.masked.gather.v17f32.v17p0f32(<17 x float*> %gep, i32 4, <17 x i1> , <17 x float> undef) + ret <17 x float> %res +} + +declare <17 x float> @llvm.masked.gather.v17f32.v17p0f32(<17 x float*>, i32 immarg, <17 x i1>, <17 x float>) +declare void @llvm.masked.scatter.v17f32.v17p0f32(<17 x float> , <17 x float*> , i32 , <17 x i1>) + declare <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*>, i32, <2 x i1>, <2 x double>) declare void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double>, <2 x double*>, i32, <2 x i1>) declare <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*>, i32, <2 x i1>, <2 x i32>) From 7a2c8be641ded68b3424b46dbf47f2879a9eaa2e Mon Sep 17 00:00:00 2001 From: Zakk Chen Date: Fri, 18 Dec 2020 00:14:53 -0800 Subject: [PATCH 062/378] [RISCV] Define vleff intrinsics. Define vleff intrinsics and lower to V instructions. We work with @rogfer01 from BSC to come out this patch. Authored-by: Roger Ferrer Ibanez Co-Authored-by: Zakk Chen Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D93516 --- llvm/include/llvm/IR/IntrinsicsRISCV.td | 1 + .../Target/RISCV/RISCVInstrInfoVPseudos.td | 13 + llvm/test/CodeGen/RISCV/rvv/vleff-rv32.ll | 1045 +++++++++++++ llvm/test/CodeGen/RISCV/rvv/vleff-rv64.ll | 1333 +++++++++++++++++ 4 files changed, 2392 insertions(+) create mode 100644 llvm/test/CodeGen/RISCV/rvv/vleff-rv32.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vleff-rv64.ll diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td index dc1d56322191a..d3ccd2eaf186e 100644 --- a/llvm/include/llvm/IR/IntrinsicsRISCV.td +++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td @@ -378,6 +378,7 @@ let TargetPrefix = "riscv" in { } defm vle : RISCVUSLoad; + defm vleff : RISCVUSLoad; defm vse : RISCVUSStore; defm vlse: RISCVSLoad; defm vsse: RISCVSStore; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td index a5c5c04542e14..68c656a049ae8 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -1599,6 +1599,16 @@ foreach eew = EEWList in { defm PseudoVSUXEI # eew : VPseudoIStore; } +//===----------------------------------------------------------------------===// +// 7.7. Unit-stride Fault-Only-First Loads +//===----------------------------------------------------------------------===// + +// vleff may update VL register +let hasSideEffects = 1, Defs = [VL] in +foreach eew = EEWList in { + defm PseudoVLE # eew # FF : VPseudoUSLoad; +} + //===----------------------------------------------------------------------===// // Pseudo Instructions //===----------------------------------------------------------------------===// @@ -1866,6 +1876,9 @@ foreach vti = AllVectors in defm : VPatUSLoad<"int_riscv_vle", "PseudoVLE" # vti.SEW, vti.Vector, vti.Mask, vti.SEW, vti.LMul, vti.RegClass>; + defm : VPatUSLoad<"int_riscv_vleff", + "PseudoVLE" # vti.SEW # "FF", + vti.Vector, vti.Mask, vti.SEW, vti.LMul, vti.RegClass>; defm : VPatUSStore<"int_riscv_vse", "PseudoVSE" # vti.SEW, vti.Vector, vti.Mask, vti.SEW, vti.LMul, vti.RegClass>; diff --git a/llvm/test/CodeGen/RISCV/rvv/vleff-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vleff-rv32.ll new file mode 100644 index 0000000000000..ea882a5bf5876 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vleff-rv32.ll @@ -0,0 +1,1045 @@ +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f,+experimental-zfh,+f,+d -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare @llvm.riscv.vleff.nxv1i32( + *, + i32); + +define @intrinsic_vleff_v_nxv1i32_nxv1i32(* %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_v_nxv1i32_nxv1i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vle32ff.v {{v[0-9]+}}, (a0) + %a = call @llvm.riscv.vleff.nxv1i32( + * %0, + i32 %1) + + ret %a +} + +declare @llvm.riscv.vleff.mask.nxv1i32( + , + *, + , + i32); + +define @intrinsic_vleff_mask_v_nxv1i32_nxv1i32( %0, * %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_mask_v_nxv1i32_nxv1i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vle32ff.v {{v[0-9]+}}, (a0), v0.t + %a = call @llvm.riscv.vleff.mask.nxv1i32( + %0, + * %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vleff.nxv2i32( + *, + i32); + +define @intrinsic_vleff_v_nxv2i32_nxv2i32(* %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_v_nxv2i32_nxv2i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vle32ff.v {{v[0-9]+}}, (a0) + %a = call @llvm.riscv.vleff.nxv2i32( + * %0, + i32 %1) + + ret %a +} + +declare @llvm.riscv.vleff.mask.nxv2i32( + , + *, + , + i32); + +define @intrinsic_vleff_mask_v_nxv2i32_nxv2i32( %0, * %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_mask_v_nxv2i32_nxv2i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vle32ff.v {{v[0-9]+}}, (a0), v0.t + %a = call @llvm.riscv.vleff.mask.nxv2i32( + %0, + * %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vleff.nxv4i32( + *, + i32); + +define @intrinsic_vleff_v_nxv4i32_nxv4i32(* %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_v_nxv4i32_nxv4i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vle32ff.v {{v[0-9]+}}, (a0) + %a = call @llvm.riscv.vleff.nxv4i32( + * %0, + i32 %1) + + ret %a +} + +declare @llvm.riscv.vleff.mask.nxv4i32( + , + *, + , + i32); + +define @intrinsic_vleff_mask_v_nxv4i32_nxv4i32( %0, * %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_mask_v_nxv4i32_nxv4i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vle32ff.v {{v[0-9]+}}, (a0), v0.t + %a = call @llvm.riscv.vleff.mask.nxv4i32( + %0, + * %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vleff.nxv8i32( + *, + i32); + +define @intrinsic_vleff_v_nxv8i32_nxv8i32(* %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_v_nxv8i32_nxv8i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vle32ff.v {{v[0-9]+}}, (a0) + %a = call @llvm.riscv.vleff.nxv8i32( + * %0, + i32 %1) + + ret %a +} + +declare @llvm.riscv.vleff.mask.nxv8i32( + , + *, + , + i32); + +define @intrinsic_vleff_mask_v_nxv8i32_nxv8i32( %0, * %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_mask_v_nxv8i32_nxv8i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vle32ff.v {{v[0-9]+}}, (a0), v0.t + %a = call @llvm.riscv.vleff.mask.nxv8i32( + %0, + * %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vleff.nxv16i32( + *, + i32); + +define @intrinsic_vleff_v_nxv16i32_nxv16i32(* %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_v_nxv16i32_nxv16i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m8,ta,mu +; CHECK: vle32ff.v {{v[0-9]+}}, (a0) + %a = call @llvm.riscv.vleff.nxv16i32( + * %0, + i32 %1) + + ret %a +} + +declare @llvm.riscv.vleff.mask.nxv16i32( + , + *, + , + i32); + +define @intrinsic_vleff_mask_v_nxv16i32_nxv16i32( %0, * %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_mask_v_nxv16i32_nxv16i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m8,ta,mu +; CHECK: vle32ff.v {{v[0-9]+}}, (a0), v0.t + %a = call @llvm.riscv.vleff.mask.nxv16i32( + %0, + * %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vleff.nxv1f32( + *, + i32); + +define @intrinsic_vleff_v_nxv1f32_nxv1f32(* %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_v_nxv1f32_nxv1f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vle32ff.v {{v[0-9]+}}, (a0) + %a = call @llvm.riscv.vleff.nxv1f32( + * %0, + i32 %1) + + ret %a +} + +declare @llvm.riscv.vleff.mask.nxv1f32( + , + *, + , + i32); + +define @intrinsic_vleff_mask_v_nxv1f32_nxv1f32( %0, * %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_mask_v_nxv1f32_nxv1f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vle32ff.v {{v[0-9]+}}, (a0), v0.t + %a = call @llvm.riscv.vleff.mask.nxv1f32( + %0, + * %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vleff.nxv2f32( + *, + i32); + +define @intrinsic_vleff_v_nxv2f32_nxv2f32(* %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_v_nxv2f32_nxv2f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vle32ff.v {{v[0-9]+}}, (a0) + %a = call @llvm.riscv.vleff.nxv2f32( + * %0, + i32 %1) + + ret %a +} + +declare @llvm.riscv.vleff.mask.nxv2f32( + , + *, + , + i32); + +define @intrinsic_vleff_mask_v_nxv2f32_nxv2f32( %0, * %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_mask_v_nxv2f32_nxv2f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vle32ff.v {{v[0-9]+}}, (a0), v0.t + %a = call @llvm.riscv.vleff.mask.nxv2f32( + %0, + * %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vleff.nxv4f32( + *, + i32); + +define @intrinsic_vleff_v_nxv4f32_nxv4f32(* %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_v_nxv4f32_nxv4f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vle32ff.v {{v[0-9]+}}, (a0) + %a = call @llvm.riscv.vleff.nxv4f32( + * %0, + i32 %1) + + ret %a +} + +declare @llvm.riscv.vleff.mask.nxv4f32( + , + *, + , + i32); + +define @intrinsic_vleff_mask_v_nxv4f32_nxv4f32( %0, * %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_mask_v_nxv4f32_nxv4f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vle32ff.v {{v[0-9]+}}, (a0), v0.t + %a = call @llvm.riscv.vleff.mask.nxv4f32( + %0, + * %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vleff.nxv8f32( + *, + i32); + +define @intrinsic_vleff_v_nxv8f32_nxv8f32(* %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_v_nxv8f32_nxv8f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vle32ff.v {{v[0-9]+}}, (a0) + %a = call @llvm.riscv.vleff.nxv8f32( + * %0, + i32 %1) + + ret %a +} + +declare @llvm.riscv.vleff.mask.nxv8f32( + , + *, + , + i32); + +define @intrinsic_vleff_mask_v_nxv8f32_nxv8f32( %0, * %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_mask_v_nxv8f32_nxv8f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vle32ff.v {{v[0-9]+}}, (a0), v0.t + %a = call @llvm.riscv.vleff.mask.nxv8f32( + %0, + * %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vleff.nxv16f32( + *, + i32); + +define @intrinsic_vleff_v_nxv16f32_nxv16f32(* %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_v_nxv16f32_nxv16f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m8,ta,mu +; CHECK: vle32ff.v {{v[0-9]+}}, (a0) + %a = call @llvm.riscv.vleff.nxv16f32( + * %0, + i32 %1) + + ret %a +} + +declare @llvm.riscv.vleff.mask.nxv16f32( + , + *, + , + i32); + +define @intrinsic_vleff_mask_v_nxv16f32_nxv16f32( %0, * %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_mask_v_nxv16f32_nxv16f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m8,ta,mu +; CHECK: vle32ff.v {{v[0-9]+}}, (a0), v0.t + %a = call @llvm.riscv.vleff.mask.nxv16f32( + %0, + * %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vleff.nxv1i16( + *, + i32); + +define @intrinsic_vleff_v_nxv1i16_nxv1i16(* %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_v_nxv1i16_nxv1i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vle16ff.v {{v[0-9]+}}, (a0) + %a = call @llvm.riscv.vleff.nxv1i16( + * %0, + i32 %1) + + ret %a +} + +declare @llvm.riscv.vleff.mask.nxv1i16( + , + *, + , + i32); + +define @intrinsic_vleff_mask_v_nxv1i16_nxv1i16( %0, * %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_mask_v_nxv1i16_nxv1i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vle16ff.v {{v[0-9]+}}, (a0), v0.t + %a = call @llvm.riscv.vleff.mask.nxv1i16( + %0, + * %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vleff.nxv2i16( + *, + i32); + +define @intrinsic_vleff_v_nxv2i16_nxv2i16(* %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_v_nxv2i16_nxv2i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vle16ff.v {{v[0-9]+}}, (a0) + %a = call @llvm.riscv.vleff.nxv2i16( + * %0, + i32 %1) + + ret %a +} + +declare @llvm.riscv.vleff.mask.nxv2i16( + , + *, + , + i32); + +define @intrinsic_vleff_mask_v_nxv2i16_nxv2i16( %0, * %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_mask_v_nxv2i16_nxv2i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vle16ff.v {{v[0-9]+}}, (a0), v0.t + %a = call @llvm.riscv.vleff.mask.nxv2i16( + %0, + * %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vleff.nxv4i16( + *, + i32); + +define @intrinsic_vleff_v_nxv4i16_nxv4i16(* %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_v_nxv4i16_nxv4i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vle16ff.v {{v[0-9]+}}, (a0) + %a = call @llvm.riscv.vleff.nxv4i16( + * %0, + i32 %1) + + ret %a +} + +declare @llvm.riscv.vleff.mask.nxv4i16( + , + *, + , + i32); + +define @intrinsic_vleff_mask_v_nxv4i16_nxv4i16( %0, * %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_mask_v_nxv4i16_nxv4i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vle16ff.v {{v[0-9]+}}, (a0), v0.t + %a = call @llvm.riscv.vleff.mask.nxv4i16( + %0, + * %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vleff.nxv8i16( + *, + i32); + +define @intrinsic_vleff_v_nxv8i16_nxv8i16(* %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_v_nxv8i16_nxv8i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vle16ff.v {{v[0-9]+}}, (a0) + %a = call @llvm.riscv.vleff.nxv8i16( + * %0, + i32 %1) + + ret %a +} + +declare @llvm.riscv.vleff.mask.nxv8i16( + , + *, + , + i32); + +define @intrinsic_vleff_mask_v_nxv8i16_nxv8i16( %0, * %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_mask_v_nxv8i16_nxv8i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vle16ff.v {{v[0-9]+}}, (a0), v0.t + %a = call @llvm.riscv.vleff.mask.nxv8i16( + %0, + * %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vleff.nxv16i16( + *, + i32); + +define @intrinsic_vleff_v_nxv16i16_nxv16i16(* %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_v_nxv16i16_nxv16i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vle16ff.v {{v[0-9]+}}, (a0) + %a = call @llvm.riscv.vleff.nxv16i16( + * %0, + i32 %1) + + ret %a +} + +declare @llvm.riscv.vleff.mask.nxv16i16( + , + *, + , + i32); + +define @intrinsic_vleff_mask_v_nxv16i16_nxv16i16( %0, * %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_mask_v_nxv16i16_nxv16i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vle16ff.v {{v[0-9]+}}, (a0), v0.t + %a = call @llvm.riscv.vleff.mask.nxv16i16( + %0, + * %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vleff.nxv32i16( + *, + i32); + +define @intrinsic_vleff_v_nxv32i16_nxv32i16(* %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_v_nxv32i16_nxv32i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m8,ta,mu +; CHECK: vle16ff.v {{v[0-9]+}}, (a0) + %a = call @llvm.riscv.vleff.nxv32i16( + * %0, + i32 %1) + + ret %a +} + +declare @llvm.riscv.vleff.mask.nxv32i16( + , + *, + , + i32); + +define @intrinsic_vleff_mask_v_nxv32i16_nxv32i16( %0, * %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_mask_v_nxv32i16_nxv32i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m8,ta,mu +; CHECK: vle16ff.v {{v[0-9]+}}, (a0), v0.t + %a = call @llvm.riscv.vleff.mask.nxv32i16( + %0, + * %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vleff.nxv1f16( + *, + i32); + +define @intrinsic_vleff_v_nxv1f16_nxv1f16(* %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_v_nxv1f16_nxv1f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vle16ff.v {{v[0-9]+}}, (a0) + %a = call @llvm.riscv.vleff.nxv1f16( + * %0, + i32 %1) + + ret %a +} + +declare @llvm.riscv.vleff.mask.nxv1f16( + , + *, + , + i32); + +define @intrinsic_vleff_mask_v_nxv1f16_nxv1f16( %0, * %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_mask_v_nxv1f16_nxv1f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vle16ff.v {{v[0-9]+}}, (a0), v0.t + %a = call @llvm.riscv.vleff.mask.nxv1f16( + %0, + * %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vleff.nxv2f16( + *, + i32); + +define @intrinsic_vleff_v_nxv2f16_nxv2f16(* %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_v_nxv2f16_nxv2f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vle16ff.v {{v[0-9]+}}, (a0) + %a = call @llvm.riscv.vleff.nxv2f16( + * %0, + i32 %1) + + ret %a +} + +declare @llvm.riscv.vleff.mask.nxv2f16( + , + *, + , + i32); + +define @intrinsic_vleff_mask_v_nxv2f16_nxv2f16( %0, * %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_mask_v_nxv2f16_nxv2f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vle16ff.v {{v[0-9]+}}, (a0), v0.t + %a = call @llvm.riscv.vleff.mask.nxv2f16( + %0, + * %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vleff.nxv4f16( + *, + i32); + +define @intrinsic_vleff_v_nxv4f16_nxv4f16(* %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_v_nxv4f16_nxv4f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vle16ff.v {{v[0-9]+}}, (a0) + %a = call @llvm.riscv.vleff.nxv4f16( + * %0, + i32 %1) + + ret %a +} + +declare @llvm.riscv.vleff.mask.nxv4f16( + , + *, + , + i32); + +define @intrinsic_vleff_mask_v_nxv4f16_nxv4f16( %0, * %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_mask_v_nxv4f16_nxv4f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vle16ff.v {{v[0-9]+}}, (a0), v0.t + %a = call @llvm.riscv.vleff.mask.nxv4f16( + %0, + * %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vleff.nxv8f16( + *, + i32); + +define @intrinsic_vleff_v_nxv8f16_nxv8f16(* %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_v_nxv8f16_nxv8f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vle16ff.v {{v[0-9]+}}, (a0) + %a = call @llvm.riscv.vleff.nxv8f16( + * %0, + i32 %1) + + ret %a +} + +declare @llvm.riscv.vleff.mask.nxv8f16( + , + *, + , + i32); + +define @intrinsic_vleff_mask_v_nxv8f16_nxv8f16( %0, * %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_mask_v_nxv8f16_nxv8f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vle16ff.v {{v[0-9]+}}, (a0), v0.t + %a = call @llvm.riscv.vleff.mask.nxv8f16( + %0, + * %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vleff.nxv16f16( + *, + i32); + +define @intrinsic_vleff_v_nxv16f16_nxv16f16(* %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_v_nxv16f16_nxv16f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vle16ff.v {{v[0-9]+}}, (a0) + %a = call @llvm.riscv.vleff.nxv16f16( + * %0, + i32 %1) + + ret %a +} + +declare @llvm.riscv.vleff.mask.nxv16f16( + , + *, + , + i32); + +define @intrinsic_vleff_mask_v_nxv16f16_nxv16f16( %0, * %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_mask_v_nxv16f16_nxv16f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vle16ff.v {{v[0-9]+}}, (a0), v0.t + %a = call @llvm.riscv.vleff.mask.nxv16f16( + %0, + * %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vleff.nxv32f16( + *, + i32); + +define @intrinsic_vleff_v_nxv32f16_nxv32f16(* %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_v_nxv32f16_nxv32f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m8,ta,mu +; CHECK: vle16ff.v {{v[0-9]+}}, (a0) + %a = call @llvm.riscv.vleff.nxv32f16( + * %0, + i32 %1) + + ret %a +} + +declare @llvm.riscv.vleff.mask.nxv32f16( + , + *, + , + i32); + +define @intrinsic_vleff_mask_v_nxv32f16_nxv32f16( %0, * %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_mask_v_nxv32f16_nxv32f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m8,ta,mu +; CHECK: vle16ff.v {{v[0-9]+}}, (a0), v0.t + %a = call @llvm.riscv.vleff.mask.nxv32f16( + %0, + * %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vleff.nxv1i8( + *, + i32); + +define @intrinsic_vleff_v_nxv1i8_nxv1i8(* %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_v_nxv1i8_nxv1i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vle8ff.v {{v[0-9]+}}, (a0) + %a = call @llvm.riscv.vleff.nxv1i8( + * %0, + i32 %1) + + ret %a +} + +declare @llvm.riscv.vleff.mask.nxv1i8( + , + *, + , + i32); + +define @intrinsic_vleff_mask_v_nxv1i8_nxv1i8( %0, * %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_mask_v_nxv1i8_nxv1i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vle8ff.v {{v[0-9]+}}, (a0), v0.t + %a = call @llvm.riscv.vleff.mask.nxv1i8( + %0, + * %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vleff.nxv2i8( + *, + i32); + +define @intrinsic_vleff_v_nxv2i8_nxv2i8(* %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_v_nxv2i8_nxv2i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vle8ff.v {{v[0-9]+}}, (a0) + %a = call @llvm.riscv.vleff.nxv2i8( + * %0, + i32 %1) + + ret %a +} + +declare @llvm.riscv.vleff.mask.nxv2i8( + , + *, + , + i32); + +define @intrinsic_vleff_mask_v_nxv2i8_nxv2i8( %0, * %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_mask_v_nxv2i8_nxv2i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vle8ff.v {{v[0-9]+}}, (a0), v0.t + %a = call @llvm.riscv.vleff.mask.nxv2i8( + %0, + * %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vleff.nxv4i8( + *, + i32); + +define @intrinsic_vleff_v_nxv4i8_nxv4i8(* %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_v_nxv4i8_nxv4i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vle8ff.v {{v[0-9]+}}, (a0) + %a = call @llvm.riscv.vleff.nxv4i8( + * %0, + i32 %1) + + ret %a +} + +declare @llvm.riscv.vleff.mask.nxv4i8( + , + *, + , + i32); + +define @intrinsic_vleff_mask_v_nxv4i8_nxv4i8( %0, * %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_mask_v_nxv4i8_nxv4i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vle8ff.v {{v[0-9]+}}, (a0), v0.t + %a = call @llvm.riscv.vleff.mask.nxv4i8( + %0, + * %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vleff.nxv8i8( + *, + i32); + +define @intrinsic_vleff_v_nxv8i8_nxv8i8(* %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_v_nxv8i8_nxv8i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vle8ff.v {{v[0-9]+}}, (a0) + %a = call @llvm.riscv.vleff.nxv8i8( + * %0, + i32 %1) + + ret %a +} + +declare @llvm.riscv.vleff.mask.nxv8i8( + , + *, + , + i32); + +define @intrinsic_vleff_mask_v_nxv8i8_nxv8i8( %0, * %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_mask_v_nxv8i8_nxv8i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vle8ff.v {{v[0-9]+}}, (a0), v0.t + %a = call @llvm.riscv.vleff.mask.nxv8i8( + %0, + * %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vleff.nxv16i8( + *, + i32); + +define @intrinsic_vleff_v_nxv16i8_nxv16i8(* %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_v_nxv16i8_nxv16i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vle8ff.v {{v[0-9]+}}, (a0) + %a = call @llvm.riscv.vleff.nxv16i8( + * %0, + i32 %1) + + ret %a +} + +declare @llvm.riscv.vleff.mask.nxv16i8( + , + *, + , + i32); + +define @intrinsic_vleff_mask_v_nxv16i8_nxv16i8( %0, * %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_mask_v_nxv16i8_nxv16i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vle8ff.v {{v[0-9]+}}, (a0), v0.t + %a = call @llvm.riscv.vleff.mask.nxv16i8( + %0, + * %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vleff.nxv32i8( + *, + i32); + +define @intrinsic_vleff_v_nxv32i8_nxv32i8(* %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_v_nxv32i8_nxv32i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vle8ff.v {{v[0-9]+}}, (a0) + %a = call @llvm.riscv.vleff.nxv32i8( + * %0, + i32 %1) + + ret %a +} + +declare @llvm.riscv.vleff.mask.nxv32i8( + , + *, + , + i32); + +define @intrinsic_vleff_mask_v_nxv32i8_nxv32i8( %0, * %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_mask_v_nxv32i8_nxv32i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vle8ff.v {{v[0-9]+}}, (a0), v0.t + %a = call @llvm.riscv.vleff.mask.nxv32i8( + %0, + * %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vleff.nxv64i8( + *, + i32); + +define @intrinsic_vleff_v_nxv64i8_nxv64i8(* %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_v_nxv64i8_nxv64i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m8,ta,mu +; CHECK: vle8ff.v {{v[0-9]+}}, (a0) + %a = call @llvm.riscv.vleff.nxv64i8( + * %0, + i32 %1) + + ret %a +} + +declare @llvm.riscv.vleff.mask.nxv64i8( + , + *, + , + i32); + +define @intrinsic_vleff_mask_v_nxv64i8_nxv64i8( %0, * %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_mask_v_nxv64i8_nxv64i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m8,ta,mu +; CHECK: vle8ff.v {{v[0-9]+}}, (a0), v0.t + %a = call @llvm.riscv.vleff.mask.nxv64i8( + %0, + * %1, + %2, + i32 %3) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vleff-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vleff-rv64.ll new file mode 100644 index 0000000000000..560221c2536b1 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vleff-rv64.ll @@ -0,0 +1,1333 @@ +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+experimental-zfh,+f,+d -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare @llvm.riscv.vleff.nxv1i64( + *, + i64); + +define @intrinsic_vleff_v_nxv1i64_nxv1i64(* %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_v_nxv1i64_nxv1i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vle64ff.v {{v[0-9]+}}, (a0) + %a = call @llvm.riscv.vleff.nxv1i64( + * %0, + i64 %1) + + ret %a +} + +declare @llvm.riscv.vleff.mask.nxv1i64( + , + *, + , + i64); + +define @intrinsic_vleff_mask_v_nxv1i64_nxv1i64( %0, * %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_mask_v_nxv1i64_nxv1i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vle64ff.v {{v[0-9]+}}, (a0), v0.t + %a = call @llvm.riscv.vleff.mask.nxv1i64( + %0, + * %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vleff.nxv2i64( + *, + i64); + +define @intrinsic_vleff_v_nxv2i64_nxv2i64(* %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_v_nxv2i64_nxv2i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vle64ff.v {{v[0-9]+}}, (a0) + %a = call @llvm.riscv.vleff.nxv2i64( + * %0, + i64 %1) + + ret %a +} + +declare @llvm.riscv.vleff.mask.nxv2i64( + , + *, + , + i64); + +define @intrinsic_vleff_mask_v_nxv2i64_nxv2i64( %0, * %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_mask_v_nxv2i64_nxv2i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vle64ff.v {{v[0-9]+}}, (a0), v0.t + %a = call @llvm.riscv.vleff.mask.nxv2i64( + %0, + * %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vleff.nxv4i64( + *, + i64); + +define @intrinsic_vleff_v_nxv4i64_nxv4i64(* %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_v_nxv4i64_nxv4i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vle64ff.v {{v[0-9]+}}, (a0) + %a = call @llvm.riscv.vleff.nxv4i64( + * %0, + i64 %1) + + ret %a +} + +declare @llvm.riscv.vleff.mask.nxv4i64( + , + *, + , + i64); + +define @intrinsic_vleff_mask_v_nxv4i64_nxv4i64( %0, * %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_mask_v_nxv4i64_nxv4i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vle64ff.v {{v[0-9]+}}, (a0), v0.t + %a = call @llvm.riscv.vleff.mask.nxv4i64( + %0, + * %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vleff.nxv8i64( + *, + i64); + +define @intrinsic_vleff_v_nxv8i64_nxv8i64(* %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_v_nxv8i64_nxv8i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m8,ta,mu +; CHECK: vle64ff.v {{v[0-9]+}}, (a0) + %a = call @llvm.riscv.vleff.nxv8i64( + * %0, + i64 %1) + + ret %a +} + +declare @llvm.riscv.vleff.mask.nxv8i64( + , + *, + , + i64); + +define @intrinsic_vleff_mask_v_nxv8i64_nxv8i64( %0, * %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_mask_v_nxv8i64_nxv8i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m8,ta,mu +; CHECK: vle64ff.v {{v[0-9]+}}, (a0), v0.t + %a = call @llvm.riscv.vleff.mask.nxv8i64( + %0, + * %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vleff.nxv1f64( + *, + i64); + +define @intrinsic_vleff_v_nxv1f64_nxv1f64(* %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_v_nxv1f64_nxv1f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vle64ff.v {{v[0-9]+}}, (a0) + %a = call @llvm.riscv.vleff.nxv1f64( + * %0, + i64 %1) + + ret %a +} + +declare @llvm.riscv.vleff.mask.nxv1f64( + , + *, + , + i64); + +define @intrinsic_vleff_mask_v_nxv1f64_nxv1f64( %0, * %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_mask_v_nxv1f64_nxv1f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vle64ff.v {{v[0-9]+}}, (a0), v0.t + %a = call @llvm.riscv.vleff.mask.nxv1f64( + %0, + * %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vleff.nxv2f64( + *, + i64); + +define @intrinsic_vleff_v_nxv2f64_nxv2f64(* %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_v_nxv2f64_nxv2f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vle64ff.v {{v[0-9]+}}, (a0) + %a = call @llvm.riscv.vleff.nxv2f64( + * %0, + i64 %1) + + ret %a +} + +declare @llvm.riscv.vleff.mask.nxv2f64( + , + *, + , + i64); + +define @intrinsic_vleff_mask_v_nxv2f64_nxv2f64( %0, * %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_mask_v_nxv2f64_nxv2f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vle64ff.v {{v[0-9]+}}, (a0), v0.t + %a = call @llvm.riscv.vleff.mask.nxv2f64( + %0, + * %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vleff.nxv4f64( + *, + i64); + +define @intrinsic_vleff_v_nxv4f64_nxv4f64(* %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_v_nxv4f64_nxv4f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vle64ff.v {{v[0-9]+}}, (a0) + %a = call @llvm.riscv.vleff.nxv4f64( + * %0, + i64 %1) + + ret %a +} + +declare @llvm.riscv.vleff.mask.nxv4f64( + , + *, + , + i64); + +define @intrinsic_vleff_mask_v_nxv4f64_nxv4f64( %0, * %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_mask_v_nxv4f64_nxv4f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vle64ff.v {{v[0-9]+}}, (a0), v0.t + %a = call @llvm.riscv.vleff.mask.nxv4f64( + %0, + * %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vleff.nxv8f64( + *, + i64); + +define @intrinsic_vleff_v_nxv8f64_nxv8f64(* %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_v_nxv8f64_nxv8f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m8,ta,mu +; CHECK: vle64ff.v {{v[0-9]+}}, (a0) + %a = call @llvm.riscv.vleff.nxv8f64( + * %0, + i64 %1) + + ret %a +} + +declare @llvm.riscv.vleff.mask.nxv8f64( + , + *, + , + i64); + +define @intrinsic_vleff_mask_v_nxv8f64_nxv8f64( %0, * %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_mask_v_nxv8f64_nxv8f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m8,ta,mu +; CHECK: vle64ff.v {{v[0-9]+}}, (a0), v0.t + %a = call @llvm.riscv.vleff.mask.nxv8f64( + %0, + * %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vleff.nxv1i32( + *, + i64); + +define @intrinsic_vleff_v_nxv1i32_nxv1i32(* %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_v_nxv1i32_nxv1i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vle32ff.v {{v[0-9]+}}, (a0) + %a = call @llvm.riscv.vleff.nxv1i32( + * %0, + i64 %1) + + ret %a +} + +declare @llvm.riscv.vleff.mask.nxv1i32( + , + *, + , + i64); + +define @intrinsic_vleff_mask_v_nxv1i32_nxv1i32( %0, * %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_mask_v_nxv1i32_nxv1i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vle32ff.v {{v[0-9]+}}, (a0), v0.t + %a = call @llvm.riscv.vleff.mask.nxv1i32( + %0, + * %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vleff.nxv2i32( + *, + i64); + +define @intrinsic_vleff_v_nxv2i32_nxv2i32(* %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_v_nxv2i32_nxv2i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vle32ff.v {{v[0-9]+}}, (a0) + %a = call @llvm.riscv.vleff.nxv2i32( + * %0, + i64 %1) + + ret %a +} + +declare @llvm.riscv.vleff.mask.nxv2i32( + , + *, + , + i64); + +define @intrinsic_vleff_mask_v_nxv2i32_nxv2i32( %0, * %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_mask_v_nxv2i32_nxv2i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vle32ff.v {{v[0-9]+}}, (a0), v0.t + %a = call @llvm.riscv.vleff.mask.nxv2i32( + %0, + * %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vleff.nxv4i32( + *, + i64); + +define @intrinsic_vleff_v_nxv4i32_nxv4i32(* %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_v_nxv4i32_nxv4i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vle32ff.v {{v[0-9]+}}, (a0) + %a = call @llvm.riscv.vleff.nxv4i32( + * %0, + i64 %1) + + ret %a +} + +declare @llvm.riscv.vleff.mask.nxv4i32( + , + *, + , + i64); + +define @intrinsic_vleff_mask_v_nxv4i32_nxv4i32( %0, * %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_mask_v_nxv4i32_nxv4i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vle32ff.v {{v[0-9]+}}, (a0), v0.t + %a = call @llvm.riscv.vleff.mask.nxv4i32( + %0, + * %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vleff.nxv8i32( + *, + i64); + +define @intrinsic_vleff_v_nxv8i32_nxv8i32(* %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_v_nxv8i32_nxv8i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vle32ff.v {{v[0-9]+}}, (a0) + %a = call @llvm.riscv.vleff.nxv8i32( + * %0, + i64 %1) + + ret %a +} + +declare @llvm.riscv.vleff.mask.nxv8i32( + , + *, + , + i64); + +define @intrinsic_vleff_mask_v_nxv8i32_nxv8i32( %0, * %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_mask_v_nxv8i32_nxv8i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vle32ff.v {{v[0-9]+}}, (a0), v0.t + %a = call @llvm.riscv.vleff.mask.nxv8i32( + %0, + * %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vleff.nxv16i32( + *, + i64); + +define @intrinsic_vleff_v_nxv16i32_nxv16i32(* %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_v_nxv16i32_nxv16i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m8,ta,mu +; CHECK: vle32ff.v {{v[0-9]+}}, (a0) + %a = call @llvm.riscv.vleff.nxv16i32( + * %0, + i64 %1) + + ret %a +} + +declare @llvm.riscv.vleff.mask.nxv16i32( + , + *, + , + i64); + +define @intrinsic_vleff_mask_v_nxv16i32_nxv16i32( %0, * %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_mask_v_nxv16i32_nxv16i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m8,ta,mu +; CHECK: vle32ff.v {{v[0-9]+}}, (a0), v0.t + %a = call @llvm.riscv.vleff.mask.nxv16i32( + %0, + * %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vleff.nxv1f32( + *, + i64); + +define @intrinsic_vleff_v_nxv1f32_nxv1f32(* %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_v_nxv1f32_nxv1f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vle32ff.v {{v[0-9]+}}, (a0) + %a = call @llvm.riscv.vleff.nxv1f32( + * %0, + i64 %1) + + ret %a +} + +declare @llvm.riscv.vleff.mask.nxv1f32( + , + *, + , + i64); + +define @intrinsic_vleff_mask_v_nxv1f32_nxv1f32( %0, * %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_mask_v_nxv1f32_nxv1f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vle32ff.v {{v[0-9]+}}, (a0), v0.t + %a = call @llvm.riscv.vleff.mask.nxv1f32( + %0, + * %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vleff.nxv2f32( + *, + i64); + +define @intrinsic_vleff_v_nxv2f32_nxv2f32(* %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_v_nxv2f32_nxv2f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vle32ff.v {{v[0-9]+}}, (a0) + %a = call @llvm.riscv.vleff.nxv2f32( + * %0, + i64 %1) + + ret %a +} + +declare @llvm.riscv.vleff.mask.nxv2f32( + , + *, + , + i64); + +define @intrinsic_vleff_mask_v_nxv2f32_nxv2f32( %0, * %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_mask_v_nxv2f32_nxv2f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vle32ff.v {{v[0-9]+}}, (a0), v0.t + %a = call @llvm.riscv.vleff.mask.nxv2f32( + %0, + * %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vleff.nxv4f32( + *, + i64); + +define @intrinsic_vleff_v_nxv4f32_nxv4f32(* %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_v_nxv4f32_nxv4f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vle32ff.v {{v[0-9]+}}, (a0) + %a = call @llvm.riscv.vleff.nxv4f32( + * %0, + i64 %1) + + ret %a +} + +declare @llvm.riscv.vleff.mask.nxv4f32( + , + *, + , + i64); + +define @intrinsic_vleff_mask_v_nxv4f32_nxv4f32( %0, * %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_mask_v_nxv4f32_nxv4f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vle32ff.v {{v[0-9]+}}, (a0), v0.t + %a = call @llvm.riscv.vleff.mask.nxv4f32( + %0, + * %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vleff.nxv8f32( + *, + i64); + +define @intrinsic_vleff_v_nxv8f32_nxv8f32(* %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_v_nxv8f32_nxv8f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vle32ff.v {{v[0-9]+}}, (a0) + %a = call @llvm.riscv.vleff.nxv8f32( + * %0, + i64 %1) + + ret %a +} + +declare @llvm.riscv.vleff.mask.nxv8f32( + , + *, + , + i64); + +define @intrinsic_vleff_mask_v_nxv8f32_nxv8f32( %0, * %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_mask_v_nxv8f32_nxv8f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vle32ff.v {{v[0-9]+}}, (a0), v0.t + %a = call @llvm.riscv.vleff.mask.nxv8f32( + %0, + * %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vleff.nxv16f32( + *, + i64); + +define @intrinsic_vleff_v_nxv16f32_nxv16f32(* %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_v_nxv16f32_nxv16f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m8,ta,mu +; CHECK: vle32ff.v {{v[0-9]+}}, (a0) + %a = call @llvm.riscv.vleff.nxv16f32( + * %0, + i64 %1) + + ret %a +} + +declare @llvm.riscv.vleff.mask.nxv16f32( + , + *, + , + i64); + +define @intrinsic_vleff_mask_v_nxv16f32_nxv16f32( %0, * %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_mask_v_nxv16f32_nxv16f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m8,ta,mu +; CHECK: vle32ff.v {{v[0-9]+}}, (a0), v0.t + %a = call @llvm.riscv.vleff.mask.nxv16f32( + %0, + * %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vleff.nxv1i16( + *, + i64); + +define @intrinsic_vleff_v_nxv1i16_nxv1i16(* %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_v_nxv1i16_nxv1i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vle16ff.v {{v[0-9]+}}, (a0) + %a = call @llvm.riscv.vleff.nxv1i16( + * %0, + i64 %1) + + ret %a +} + +declare @llvm.riscv.vleff.mask.nxv1i16( + , + *, + , + i64); + +define @intrinsic_vleff_mask_v_nxv1i16_nxv1i16( %0, * %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_mask_v_nxv1i16_nxv1i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vle16ff.v {{v[0-9]+}}, (a0), v0.t + %a = call @llvm.riscv.vleff.mask.nxv1i16( + %0, + * %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vleff.nxv2i16( + *, + i64); + +define @intrinsic_vleff_v_nxv2i16_nxv2i16(* %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_v_nxv2i16_nxv2i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vle16ff.v {{v[0-9]+}}, (a0) + %a = call @llvm.riscv.vleff.nxv2i16( + * %0, + i64 %1) + + ret %a +} + +declare @llvm.riscv.vleff.mask.nxv2i16( + , + *, + , + i64); + +define @intrinsic_vleff_mask_v_nxv2i16_nxv2i16( %0, * %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_mask_v_nxv2i16_nxv2i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vle16ff.v {{v[0-9]+}}, (a0), v0.t + %a = call @llvm.riscv.vleff.mask.nxv2i16( + %0, + * %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vleff.nxv4i16( + *, + i64); + +define @intrinsic_vleff_v_nxv4i16_nxv4i16(* %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_v_nxv4i16_nxv4i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vle16ff.v {{v[0-9]+}}, (a0) + %a = call @llvm.riscv.vleff.nxv4i16( + * %0, + i64 %1) + + ret %a +} + +declare @llvm.riscv.vleff.mask.nxv4i16( + , + *, + , + i64); + +define @intrinsic_vleff_mask_v_nxv4i16_nxv4i16( %0, * %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_mask_v_nxv4i16_nxv4i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vle16ff.v {{v[0-9]+}}, (a0), v0.t + %a = call @llvm.riscv.vleff.mask.nxv4i16( + %0, + * %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vleff.nxv8i16( + *, + i64); + +define @intrinsic_vleff_v_nxv8i16_nxv8i16(* %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_v_nxv8i16_nxv8i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vle16ff.v {{v[0-9]+}}, (a0) + %a = call @llvm.riscv.vleff.nxv8i16( + * %0, + i64 %1) + + ret %a +} + +declare @llvm.riscv.vleff.mask.nxv8i16( + , + *, + , + i64); + +define @intrinsic_vleff_mask_v_nxv8i16_nxv8i16( %0, * %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_mask_v_nxv8i16_nxv8i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vle16ff.v {{v[0-9]+}}, (a0), v0.t + %a = call @llvm.riscv.vleff.mask.nxv8i16( + %0, + * %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vleff.nxv16i16( + *, + i64); + +define @intrinsic_vleff_v_nxv16i16_nxv16i16(* %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_v_nxv16i16_nxv16i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vle16ff.v {{v[0-9]+}}, (a0) + %a = call @llvm.riscv.vleff.nxv16i16( + * %0, + i64 %1) + + ret %a +} + +declare @llvm.riscv.vleff.mask.nxv16i16( + , + *, + , + i64); + +define @intrinsic_vleff_mask_v_nxv16i16_nxv16i16( %0, * %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_mask_v_nxv16i16_nxv16i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vle16ff.v {{v[0-9]+}}, (a0), v0.t + %a = call @llvm.riscv.vleff.mask.nxv16i16( + %0, + * %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vleff.nxv32i16( + *, + i64); + +define @intrinsic_vleff_v_nxv32i16_nxv32i16(* %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_v_nxv32i16_nxv32i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m8,ta,mu +; CHECK: vle16ff.v {{v[0-9]+}}, (a0) + %a = call @llvm.riscv.vleff.nxv32i16( + * %0, + i64 %1) + + ret %a +} + +declare @llvm.riscv.vleff.mask.nxv32i16( + , + *, + , + i64); + +define @intrinsic_vleff_mask_v_nxv32i16_nxv32i16( %0, * %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_mask_v_nxv32i16_nxv32i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m8,ta,mu +; CHECK: vle16ff.v {{v[0-9]+}}, (a0), v0.t + %a = call @llvm.riscv.vleff.mask.nxv32i16( + %0, + * %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vleff.nxv1f16( + *, + i64); + +define @intrinsic_vleff_v_nxv1f16_nxv1f16(* %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_v_nxv1f16_nxv1f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vle16ff.v {{v[0-9]+}}, (a0) + %a = call @llvm.riscv.vleff.nxv1f16( + * %0, + i64 %1) + + ret %a +} + +declare @llvm.riscv.vleff.mask.nxv1f16( + , + *, + , + i64); + +define @intrinsic_vleff_mask_v_nxv1f16_nxv1f16( %0, * %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_mask_v_nxv1f16_nxv1f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vle16ff.v {{v[0-9]+}}, (a0), v0.t + %a = call @llvm.riscv.vleff.mask.nxv1f16( + %0, + * %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vleff.nxv2f16( + *, + i64); + +define @intrinsic_vleff_v_nxv2f16_nxv2f16(* %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_v_nxv2f16_nxv2f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vle16ff.v {{v[0-9]+}}, (a0) + %a = call @llvm.riscv.vleff.nxv2f16( + * %0, + i64 %1) + + ret %a +} + +declare @llvm.riscv.vleff.mask.nxv2f16( + , + *, + , + i64); + +define @intrinsic_vleff_mask_v_nxv2f16_nxv2f16( %0, * %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_mask_v_nxv2f16_nxv2f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vle16ff.v {{v[0-9]+}}, (a0), v0.t + %a = call @llvm.riscv.vleff.mask.nxv2f16( + %0, + * %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vleff.nxv4f16( + *, + i64); + +define @intrinsic_vleff_v_nxv4f16_nxv4f16(* %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_v_nxv4f16_nxv4f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vle16ff.v {{v[0-9]+}}, (a0) + %a = call @llvm.riscv.vleff.nxv4f16( + * %0, + i64 %1) + + ret %a +} + +declare @llvm.riscv.vleff.mask.nxv4f16( + , + *, + , + i64); + +define @intrinsic_vleff_mask_v_nxv4f16_nxv4f16( %0, * %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_mask_v_nxv4f16_nxv4f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vle16ff.v {{v[0-9]+}}, (a0), v0.t + %a = call @llvm.riscv.vleff.mask.nxv4f16( + %0, + * %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vleff.nxv8f16( + *, + i64); + +define @intrinsic_vleff_v_nxv8f16_nxv8f16(* %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_v_nxv8f16_nxv8f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vle16ff.v {{v[0-9]+}}, (a0) + %a = call @llvm.riscv.vleff.nxv8f16( + * %0, + i64 %1) + + ret %a +} + +declare @llvm.riscv.vleff.mask.nxv8f16( + , + *, + , + i64); + +define @intrinsic_vleff_mask_v_nxv8f16_nxv8f16( %0, * %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_mask_v_nxv8f16_nxv8f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vle16ff.v {{v[0-9]+}}, (a0), v0.t + %a = call @llvm.riscv.vleff.mask.nxv8f16( + %0, + * %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vleff.nxv16f16( + *, + i64); + +define @intrinsic_vleff_v_nxv16f16_nxv16f16(* %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_v_nxv16f16_nxv16f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vle16ff.v {{v[0-9]+}}, (a0) + %a = call @llvm.riscv.vleff.nxv16f16( + * %0, + i64 %1) + + ret %a +} + +declare @llvm.riscv.vleff.mask.nxv16f16( + , + *, + , + i64); + +define @intrinsic_vleff_mask_v_nxv16f16_nxv16f16( %0, * %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_mask_v_nxv16f16_nxv16f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vle16ff.v {{v[0-9]+}}, (a0), v0.t + %a = call @llvm.riscv.vleff.mask.nxv16f16( + %0, + * %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vleff.nxv32f16( + *, + i64); + +define @intrinsic_vleff_v_nxv32f16_nxv32f16(* %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_v_nxv32f16_nxv32f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m8,ta,mu +; CHECK: vle16ff.v {{v[0-9]+}}, (a0) + %a = call @llvm.riscv.vleff.nxv32f16( + * %0, + i64 %1) + + ret %a +} + +declare @llvm.riscv.vleff.mask.nxv32f16( + , + *, + , + i64); + +define @intrinsic_vleff_mask_v_nxv32f16_nxv32f16( %0, * %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_mask_v_nxv32f16_nxv32f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m8,ta,mu +; CHECK: vle16ff.v {{v[0-9]+}}, (a0), v0.t + %a = call @llvm.riscv.vleff.mask.nxv32f16( + %0, + * %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vleff.nxv1i8( + *, + i64); + +define @intrinsic_vleff_v_nxv1i8_nxv1i8(* %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_v_nxv1i8_nxv1i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vle8ff.v {{v[0-9]+}}, (a0) + %a = call @llvm.riscv.vleff.nxv1i8( + * %0, + i64 %1) + + ret %a +} + +declare @llvm.riscv.vleff.mask.nxv1i8( + , + *, + , + i64); + +define @intrinsic_vleff_mask_v_nxv1i8_nxv1i8( %0, * %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_mask_v_nxv1i8_nxv1i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vle8ff.v {{v[0-9]+}}, (a0), v0.t + %a = call @llvm.riscv.vleff.mask.nxv1i8( + %0, + * %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vleff.nxv2i8( + *, + i64); + +define @intrinsic_vleff_v_nxv2i8_nxv2i8(* %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_v_nxv2i8_nxv2i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vle8ff.v {{v[0-9]+}}, (a0) + %a = call @llvm.riscv.vleff.nxv2i8( + * %0, + i64 %1) + + ret %a +} + +declare @llvm.riscv.vleff.mask.nxv2i8( + , + *, + , + i64); + +define @intrinsic_vleff_mask_v_nxv2i8_nxv2i8( %0, * %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_mask_v_nxv2i8_nxv2i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vle8ff.v {{v[0-9]+}}, (a0), v0.t + %a = call @llvm.riscv.vleff.mask.nxv2i8( + %0, + * %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vleff.nxv4i8( + *, + i64); + +define @intrinsic_vleff_v_nxv4i8_nxv4i8(* %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_v_nxv4i8_nxv4i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vle8ff.v {{v[0-9]+}}, (a0) + %a = call @llvm.riscv.vleff.nxv4i8( + * %0, + i64 %1) + + ret %a +} + +declare @llvm.riscv.vleff.mask.nxv4i8( + , + *, + , + i64); + +define @intrinsic_vleff_mask_v_nxv4i8_nxv4i8( %0, * %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_mask_v_nxv4i8_nxv4i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vle8ff.v {{v[0-9]+}}, (a0), v0.t + %a = call @llvm.riscv.vleff.mask.nxv4i8( + %0, + * %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vleff.nxv8i8( + *, + i64); + +define @intrinsic_vleff_v_nxv8i8_nxv8i8(* %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_v_nxv8i8_nxv8i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vle8ff.v {{v[0-9]+}}, (a0) + %a = call @llvm.riscv.vleff.nxv8i8( + * %0, + i64 %1) + + ret %a +} + +declare @llvm.riscv.vleff.mask.nxv8i8( + , + *, + , + i64); + +define @intrinsic_vleff_mask_v_nxv8i8_nxv8i8( %0, * %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_mask_v_nxv8i8_nxv8i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vle8ff.v {{v[0-9]+}}, (a0), v0.t + %a = call @llvm.riscv.vleff.mask.nxv8i8( + %0, + * %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vleff.nxv16i8( + *, + i64); + +define @intrinsic_vleff_v_nxv16i8_nxv16i8(* %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_v_nxv16i8_nxv16i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vle8ff.v {{v[0-9]+}}, (a0) + %a = call @llvm.riscv.vleff.nxv16i8( + * %0, + i64 %1) + + ret %a +} + +declare @llvm.riscv.vleff.mask.nxv16i8( + , + *, + , + i64); + +define @intrinsic_vleff_mask_v_nxv16i8_nxv16i8( %0, * %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_mask_v_nxv16i8_nxv16i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vle8ff.v {{v[0-9]+}}, (a0), v0.t + %a = call @llvm.riscv.vleff.mask.nxv16i8( + %0, + * %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vleff.nxv32i8( + *, + i64); + +define @intrinsic_vleff_v_nxv32i8_nxv32i8(* %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_v_nxv32i8_nxv32i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vle8ff.v {{v[0-9]+}}, (a0) + %a = call @llvm.riscv.vleff.nxv32i8( + * %0, + i64 %1) + + ret %a +} + +declare @llvm.riscv.vleff.mask.nxv32i8( + , + *, + , + i64); + +define @intrinsic_vleff_mask_v_nxv32i8_nxv32i8( %0, * %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_mask_v_nxv32i8_nxv32i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vle8ff.v {{v[0-9]+}}, (a0), v0.t + %a = call @llvm.riscv.vleff.mask.nxv32i8( + %0, + * %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vleff.nxv64i8( + *, + i64); + +define @intrinsic_vleff_v_nxv64i8_nxv64i8(* %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_v_nxv64i8_nxv64i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m8,ta,mu +; CHECK: vle8ff.v {{v[0-9]+}}, (a0) + %a = call @llvm.riscv.vleff.nxv64i8( + * %0, + i64 %1) + + ret %a +} + +declare @llvm.riscv.vleff.mask.nxv64i8( + , + *, + , + i64); + +define @intrinsic_vleff_mask_v_nxv64i8_nxv64i8( %0, * %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vleff_mask_v_nxv64i8_nxv64i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m8,ta,mu +; CHECK: vle8ff.v {{v[0-9]+}}, (a0), v0.t + %a = call @llvm.riscv.vleff.mask.nxv64i8( + %0, + * %1, + %2, + i64 %3) + + ret %a +} From 9a8ef927df38a933909a512bf0089a28387ad876 Mon Sep 17 00:00:00 2001 From: Hsiangkai Wang Date: Wed, 16 Dec 2020 07:06:07 +0800 Subject: [PATCH 063/378] [RISCV] Define vector compare intrinsics. Define vector compare intrinsics and lower them to V instructions. We work with @rogfer01 from BSC to come out this patch. Authored-by: Roger Ferrer Ibanez Co-Authored-by: Hsiangkai Wang Differential Revision: https://reviews.llvm.org/D93368 --- llvm/include/llvm/IR/IntrinsicsRISCV.td | 47 + .../Target/RISCV/RISCVInstrInfoVPseudos.td | 131 ++ llvm/test/CodeGen/RISCV/rvv/vmfeq-rv32.ll | 757 +++++++ llvm/test/CodeGen/RISCV/rvv/vmfeq-rv64.ll | 1009 +++++++++ llvm/test/CodeGen/RISCV/rvv/vmfge-rv32.ll | 361 +++ llvm/test/CodeGen/RISCV/rvv/vmfge-rv64.ll | 481 ++++ llvm/test/CodeGen/RISCV/rvv/vmfgt-rv32.ll | 361 +++ llvm/test/CodeGen/RISCV/rvv/vmfgt-rv64.ll | 481 ++++ llvm/test/CodeGen/RISCV/rvv/vmfle-rv32.ll | 757 +++++++ llvm/test/CodeGen/RISCV/rvv/vmfle-rv64.ll | 1009 +++++++++ llvm/test/CodeGen/RISCV/rvv/vmflt-rv32.ll | 757 +++++++ llvm/test/CodeGen/RISCV/rvv/vmflt-rv64.ll | 1009 +++++++++ llvm/test/CodeGen/RISCV/rvv/vmfne-rv32.ll | 757 +++++++ llvm/test/CodeGen/RISCV/rvv/vmfne-rv64.ll | 1009 +++++++++ llvm/test/CodeGen/RISCV/rvv/vmseq-rv32.ll | 1681 ++++++++++++++ llvm/test/CodeGen/RISCV/rvv/vmseq-rv64.ll | 2017 +++++++++++++++++ llvm/test/CodeGen/RISCV/rvv/vmsgt-rv32.ll | 1021 +++++++++ llvm/test/CodeGen/RISCV/rvv/vmsgt-rv64.ll | 1225 ++++++++++ llvm/test/CodeGen/RISCV/rvv/vmsgtu-rv32.ll | 1021 +++++++++ llvm/test/CodeGen/RISCV/rvv/vmsgtu-rv64.ll | 1225 ++++++++++ llvm/test/CodeGen/RISCV/rvv/vmsle-rv32.ll | 1681 ++++++++++++++ llvm/test/CodeGen/RISCV/rvv/vmsle-rv64.ll | 2017 +++++++++++++++++ llvm/test/CodeGen/RISCV/rvv/vmsleu-rv32.ll | 1681 ++++++++++++++ llvm/test/CodeGen/RISCV/rvv/vmsleu-rv64.ll | 2017 +++++++++++++++++ llvm/test/CodeGen/RISCV/rvv/vmslt-rv32.ll | 1261 +++++++++++ llvm/test/CodeGen/RISCV/rvv/vmslt-rv64.ll | 1513 +++++++++++++ llvm/test/CodeGen/RISCV/rvv/vmsltu-rv32.ll | 1261 +++++++++++ llvm/test/CodeGen/RISCV/rvv/vmsltu-rv64.ll | 1513 +++++++++++++ llvm/test/CodeGen/RISCV/rvv/vmsne-rv32.ll | 1681 ++++++++++++++ llvm/test/CodeGen/RISCV/rvv/vmsne-rv64.ll | 2017 +++++++++++++++++ 30 files changed, 33758 insertions(+) create mode 100644 llvm/test/CodeGen/RISCV/rvv/vmfeq-rv32.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vmfeq-rv64.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vmfge-rv32.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vmfge-rv64.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vmfgt-rv32.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vmfgt-rv64.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vmfle-rv32.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vmfle-rv64.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vmflt-rv32.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vmflt-rv64.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vmfne-rv32.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vmfne-rv64.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vmseq-rv32.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vmseq-rv64.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vmsgt-rv32.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vmsgt-rv64.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vmsgtu-rv32.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vmsgtu-rv64.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vmsle-rv32.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vmsle-rv64.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vmsleu-rv32.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vmsleu-rv64.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vmslt-rv32.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vmslt-rv64.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vmsltu-rv32.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vmsltu-rv64.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vmsne-rv32.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vmsne-rv64.ll diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td index d3ccd2eaf186e..efb91c0ab0596 100644 --- a/llvm/include/llvm/IR/IntrinsicsRISCV.td +++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td @@ -254,6 +254,26 @@ let TargetPrefix = "riscv" in { [IntrNoMem]>, RISCVVIntrinsic { let ExtendOperand = 2; } + // For binary operations with mask type output without mask. + // Output: (mask type output) + // Input: (vector_in, vector_in/scalar_in, vl) + class RISCVCompareNoMask + : Intrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], + [llvm_anyvector_ty, llvm_any_ty, llvm_anyint_ty], + [IntrNoMem]>, RISCVVIntrinsic { + let ExtendOperand = 2; + } + // For binary operations with mask type output with mask. + // Output: (mask type output) + // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl) + class RISCVCompareMask + : Intrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], + [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_anyvector_ty, llvm_any_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty], + [IntrNoMem]>, RISCVVIntrinsic { + let ExtendOperand = 3; + } // For Saturating binary operations. // The destination vector type is the same as first source vector. @@ -339,14 +359,20 @@ let TargetPrefix = "riscv" in { def "int_riscv_" # NAME : RISCVSStore; def "int_riscv_" # NAME # "_mask" : RISCVSStoreMask; } + multiclass RISCVIStore { def "int_riscv_" # NAME : RISCVIStore; def "int_riscv_" # NAME # "_mask" : RISCVIStoreMask; } + + // AAX means the destination type(A) is the same as the first source + // type(A). X means any type for the second source operand. multiclass RISCVBinaryAAX { def "int_riscv_" # NAME : RISCVBinaryAAXNoMask; def "int_riscv_" # NAME # "_mask" : RISCVBinaryAAXMask; } + // ABX means the destination type(A) is different from the first source + // type(B). X means any type for the second source operand. multiclass RISCVBinaryABX { def "int_riscv_" # NAME : RISCVBinaryABXNoMask; def "int_riscv_" # NAME # "_mask" : RISCVBinaryABXMask; @@ -376,6 +402,10 @@ let TargetPrefix = "riscv" in { def "int_riscv_" # NAME : RISCVTernaryAAXANoMask; def "int_riscv_" # NAME # "_mask" : RISCVTernaryAAXAMask; } + multiclass RISCVCompare { + def "int_riscv_" # NAME : RISCVCompareNoMask; + def "int_riscv_" # NAME # "_mask" : RISCVCompareMask; + } defm vle : RISCVUSLoad; defm vleff : RISCVUSLoad; @@ -418,6 +448,15 @@ let TargetPrefix = "riscv" in { defm vnsrl : RISCVBinaryABX; defm vnsra : RISCVBinaryABX; + defm vmseq : RISCVCompare; + defm vmsne : RISCVCompare; + defm vmsltu : RISCVCompare; + defm vmslt : RISCVCompare; + defm vmsleu : RISCVCompare; + defm vmsle : RISCVCompare; + defm vmsgtu : RISCVCompare; + defm vmsgt : RISCVCompare; + defm vminu : RISCVBinaryAAX; defm vmin : RISCVBinaryAAX; defm vmaxu : RISCVBinaryAAX; @@ -508,4 +547,12 @@ let TargetPrefix = "riscv" in { defm vnclipu : RISCVSaturatingBinaryABX; defm vnclip : RISCVSaturatingBinaryABX; + + defm vmfeq : RISCVCompare; + defm vmfne : RISCVCompare; + defm vmflt : RISCVCompare; + defm vmfle : RISCVCompare; + defm vmfgt : RISCVCompare; + defm vmfge : RISCVCompare; + } // TargetPrefix = "riscv" diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td index 68c656a049ae8..67bdfa80e8c4f 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -778,6 +778,27 @@ multiclass VPseudoUnaryV_V_X_I_NoDummyMask { } } +// The destination EEW is 1. +// The source EEW is 8, 16, 32, or 64. +// When the destination EEW is different from source EEW, we need to use +// @earlyclobber to avoid the overlap between destination and source registers. +multiclass VPseudoBinaryM_VV { + foreach m = MxList.m in + defm _VV : VPseudoBinary; +} + +multiclass VPseudoBinaryM_VX { + foreach m = MxList.m in + defm !if(!eq(IsFloat, 0), "_VX", "_VF") : + VPseudoBinary; +} + +multiclass VPseudoBinaryM_VI { + foreach m = MxList.m in + defm _VI : VPseudoBinary; +} + multiclass VPseudoBinaryV_VV_VX_VI { defm "" : VPseudoBinaryV_VV; defm "" : VPseudoBinaryV_VX; @@ -884,6 +905,22 @@ multiclass VPseudoTernaryV_VX_VI; } +multiclass VPseudoBinaryM_VV_VX_VI { + defm "" : VPseudoBinaryM_VV; + defm "" : VPseudoBinaryM_VX; + defm "" : VPseudoBinaryM_VI; +} + +multiclass VPseudoBinaryM_VV_VX { + defm "" : VPseudoBinaryM_VV; + defm "" : VPseudoBinaryM_VX; +} + +multiclass VPseudoBinaryM_VX_VI { + defm "" : VPseudoBinaryM_VX; + defm "" : VPseudoBinaryM_VI; +} + //===----------------------------------------------------------------------===// // Helpers to define the SDNode patterns. //===----------------------------------------------------------------------===// @@ -1387,6 +1424,34 @@ multiclass VPatBinaryV_I { vti.RegClass, simm5>; } +multiclass VPatBinaryM_VV vtilist> { + foreach vti = vtilist in + defm : VPatBinary; +} + +multiclass VPatBinaryM_VX vtilist> { + foreach vti = vtilist in + defm : VPatBinary; +} + +multiclass VPatBinaryM_VI vtilist> { + foreach vti = vtilist in + defm : VPatBinary; +} + multiclass VPatBinaryV_VV_VX_VI vtilist, Operand ImmType = simm5> { @@ -1538,6 +1603,28 @@ multiclass VPatTernaryV_VX_VI; } +multiclass VPatBinaryM_VV_VX_VI vtilist> +{ + defm "" : VPatBinaryM_VV; + defm "" : VPatBinaryM_VX; + defm "" : VPatBinaryM_VI; +} + +multiclass VPatBinaryM_VV_VX vtilist> +{ + defm "" : VPatBinaryM_VV; + defm "" : VPatBinaryM_VX; +} + +multiclass VPatBinaryM_VX_VI vtilist> +{ + defm "" : VPatBinaryM_VX; + defm "" : VPatBinaryM_VI; +} + //===----------------------------------------------------------------------===// // Pseudo instructions and patterns. //===----------------------------------------------------------------------===// @@ -1667,6 +1754,18 @@ defm PseudoVSRA : VPseudoBinaryV_VV_VX_VI; defm PseudoVNSRL : VPseudoBinaryV_WV_WX_WI; defm PseudoVNSRA : VPseudoBinaryV_WV_WX_WI; +//===----------------------------------------------------------------------===// +// 12.8. Vector Integer Comparison Instructions +//===----------------------------------------------------------------------===// +defm PseudoVMSEQ : VPseudoBinaryM_VV_VX_VI; +defm PseudoVMSNE : VPseudoBinaryM_VV_VX_VI; +defm PseudoVMSLTU : VPseudoBinaryM_VV_VX; +defm PseudoVMSLT : VPseudoBinaryM_VV_VX; +defm PseudoVMSLEU : VPseudoBinaryM_VV_VX_VI; +defm PseudoVMSLE : VPseudoBinaryM_VV_VX_VI; +defm PseudoVMSGTU : VPseudoBinaryM_VX_VI; +defm PseudoVMSGT : VPseudoBinaryM_VX_VI; + //===----------------------------------------------------------------------===// // 12.9. Vector Integer Min/Max Instructions //===----------------------------------------------------------------------===// @@ -1792,6 +1891,15 @@ defm PseudoVFSGNJ : VPseudoBinaryV_VV_VX; defm PseudoVFSGNJN : VPseudoBinaryV_VV_VX; defm PseudoVFSGNJX : VPseudoBinaryV_VV_VX; +//===----------------------------------------------------------------------===// +// 14.13. Vector Floating-Point Compare Instructions +//===----------------------------------------------------------------------===// +defm PseudoVMFEQ : VPseudoBinaryM_VV_VX; +defm PseudoVMFNE : VPseudoBinaryM_VV_VX; +defm PseudoVMFLT : VPseudoBinaryM_VV_VX; +defm PseudoVMFLE : VPseudoBinaryM_VV_VX; +defm PseudoVMFGT : VPseudoBinaryM_VX; +defm PseudoVMFGE : VPseudoBinaryM_VX; } // Predicates = [HasStdExtV, HasStdExtF] //===----------------------------------------------------------------------===// @@ -2004,6 +2112,19 @@ defm "" : VPatBinaryV_VV_VX_VI<"int_riscv_vsra", "PseudoVSRA", AllIntegerVectors defm "" : VPatBinaryV_WV_WX_WI<"int_riscv_vnsrl", "PseudoVNSRL", AllWidenableIntVectors>; defm "" : VPatBinaryV_WV_WX_WI<"int_riscv_vnsra", "PseudoVNSRA", AllWidenableIntVectors>; +//===----------------------------------------------------------------------===// +// 12.8. Vector Integer Comparison Instructions +//===----------------------------------------------------------------------===// +defm "" : VPatBinaryM_VV_VX_VI<"int_riscv_vmseq", "PseudoVMSEQ", AllIntegerVectors>; +defm "" : VPatBinaryM_VV_VX_VI<"int_riscv_vmsne", "PseudoVMSNE", AllIntegerVectors>; +defm "" : VPatBinaryM_VV_VX<"int_riscv_vmsltu", "PseudoVMSLTU", AllIntegerVectors>; +defm "" : VPatBinaryM_VV_VX<"int_riscv_vmslt", "PseudoVMSLT", AllIntegerVectors>; +defm "" : VPatBinaryM_VV_VX_VI<"int_riscv_vmsleu", "PseudoVMSLEU", AllIntegerVectors>; +defm "" : VPatBinaryM_VV_VX_VI<"int_riscv_vmsle", "PseudoVMSLE", AllIntegerVectors>; + +defm "" : VPatBinaryM_VX_VI<"int_riscv_vmsgtu", "PseudoVMSGTU", AllIntegerVectors>; +defm "" : VPatBinaryM_VX_VI<"int_riscv_vmsgt", "PseudoVMSGT", AllIntegerVectors>; + //===----------------------------------------------------------------------===// // 12.9. Vector Integer Min/Max Instructions //===----------------------------------------------------------------------===// @@ -2134,6 +2255,16 @@ defm "" : VPatBinaryV_VV_VX<"int_riscv_vfsgnj", "PseudoVFSGNJ", AllFloatVectors> defm "" : VPatBinaryV_VV_VX<"int_riscv_vfsgnjn", "PseudoVFSGNJN", AllFloatVectors>; defm "" : VPatBinaryV_VV_VX<"int_riscv_vfsgnjx", "PseudoVFSGNJX", AllFloatVectors>; +//===----------------------------------------------------------------------===// +// 14.13. Vector Floating-Point Compare Instructions +//===----------------------------------------------------------------------===// +defm "" : VPatBinaryM_VV_VX<"int_riscv_vmfeq", "PseudoVMFEQ", AllFloatVectors>; +defm "" : VPatBinaryM_VV_VX<"int_riscv_vmfle", "PseudoVMFLE", AllFloatVectors>; +defm "" : VPatBinaryM_VV_VX<"int_riscv_vmflt", "PseudoVMFLT", AllFloatVectors>; +defm "" : VPatBinaryM_VV_VX<"int_riscv_vmfne", "PseudoVMFNE", AllFloatVectors>; +defm "" : VPatBinaryM_VX<"int_riscv_vmfgt", "PseudoVMFGT", AllFloatVectors>; +defm "" : VPatBinaryM_VX<"int_riscv_vmfge", "PseudoVMFGE", AllFloatVectors>; + } // Predicates = [HasStdExtV, HasStdExtF] //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfeq-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmfeq-rv32.ll new file mode 100644 index 0000000000000..256263bae0bb2 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vmfeq-rv32.ll @@ -0,0 +1,757 @@ +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f,+experimental-zfh -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare @llvm.riscv.vmfeq.nxv1f16( + , + , + i32); + +define @intrinsic_vmfeq_vv_nxv1f16_nxv1f16( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfeq_vv_nxv1f16_nxv1f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmfeq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmfeq.nxv1f16( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmfeq.mask.nxv1f16( + , + , + , + , + i32); + +define @intrinsic_vmfeq_mask_vv_nxv1f16_nxv1f16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv1f16_nxv1f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmfeq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmfeq.nxv1f16( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmfeq.mask.nxv1f16( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmfeq.nxv2f16( + , + , + i32); + +define @intrinsic_vmfeq_vv_nxv2f16_nxv2f16( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfeq_vv_nxv2f16_nxv2f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmfeq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmfeq.nxv2f16( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmfeq.mask.nxv2f16( + , + , + , + , + i32); + +define @intrinsic_vmfeq_mask_vv_nxv2f16_nxv2f16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv2f16_nxv2f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmfeq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmfeq.nxv2f16( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmfeq.mask.nxv2f16( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmfeq.nxv4f16( + , + , + i32); + +define @intrinsic_vmfeq_vv_nxv4f16_nxv4f16( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfeq_vv_nxv4f16_nxv4f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmfeq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmfeq.nxv4f16( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmfeq.mask.nxv4f16( + , + , + , + , + i32); + +define @intrinsic_vmfeq_mask_vv_nxv4f16_nxv4f16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv4f16_nxv4f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmfeq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmfeq.nxv4f16( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmfeq.mask.nxv4f16( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmfeq.nxv8f16( + , + , + i32); + +define @intrinsic_vmfeq_vv_nxv8f16_nxv8f16( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfeq_vv_nxv8f16_nxv8f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmfeq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmfeq.nxv8f16( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmfeq.mask.nxv8f16( + , + , + , + , + i32); + +define @intrinsic_vmfeq_mask_vv_nxv8f16_nxv8f16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv8f16_nxv8f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmfeq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmfeq.nxv8f16( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmfeq.mask.nxv8f16( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmfeq.nxv16f16( + , + , + i32); + +define @intrinsic_vmfeq_vv_nxv16f16_nxv16f16( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfeq_vv_nxv16f16_nxv16f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmfeq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmfeq.nxv16f16( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmfeq.mask.nxv16f16( + , + , + , + , + i32); + +define @intrinsic_vmfeq_mask_vv_nxv16f16_nxv16f16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv16f16_nxv16f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmfeq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmfeq.nxv16f16( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmfeq.mask.nxv16f16( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmfeq.nxv1f32( + , + , + i32); + +define @intrinsic_vmfeq_vv_nxv1f32_nxv1f32( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfeq_vv_nxv1f32_nxv1f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmfeq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmfeq.nxv1f32( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmfeq.mask.nxv1f32( + , + , + , + , + i32); + +define @intrinsic_vmfeq_mask_vv_nxv1f32_nxv1f32( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv1f32_nxv1f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmfeq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmfeq.nxv1f32( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmfeq.mask.nxv1f32( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmfeq.nxv2f32( + , + , + i32); + +define @intrinsic_vmfeq_vv_nxv2f32_nxv2f32( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfeq_vv_nxv2f32_nxv2f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmfeq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmfeq.nxv2f32( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmfeq.mask.nxv2f32( + , + , + , + , + i32); + +define @intrinsic_vmfeq_mask_vv_nxv2f32_nxv2f32( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv2f32_nxv2f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmfeq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmfeq.nxv2f32( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmfeq.mask.nxv2f32( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmfeq.nxv4f32( + , + , + i32); + +define @intrinsic_vmfeq_vv_nxv4f32_nxv4f32( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfeq_vv_nxv4f32_nxv4f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmfeq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmfeq.nxv4f32( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmfeq.mask.nxv4f32( + , + , + , + , + i32); + +define @intrinsic_vmfeq_mask_vv_nxv4f32_nxv4f32( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv4f32_nxv4f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmfeq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmfeq.nxv4f32( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmfeq.mask.nxv4f32( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmfeq.nxv8f32( + , + , + i32); + +define @intrinsic_vmfeq_vv_nxv8f32_nxv8f32( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfeq_vv_nxv8f32_nxv8f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmfeq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmfeq.nxv8f32( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmfeq.mask.nxv8f32( + , + , + , + , + i32); + +define @intrinsic_vmfeq_mask_vv_nxv8f32_nxv8f32( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv8f32_nxv8f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmfeq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmfeq.nxv8f32( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmfeq.mask.nxv8f32( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmfeq.nxv1f16.f16( + , + half, + i32); + +define @intrinsic_vmfeq_vf_nxv1f16_f16( %0, half %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfeq_vf_nxv1f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmfeq.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfeq.nxv1f16.f16( + %0, + half %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmfeq.mask.nxv1f16.f16( + , + , + half, + , + i32); + +define @intrinsic_vmfeq_mask_vf_nxv1f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv1f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmfeq.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfeq.mask.nxv1f16.f16( + %0, + %1, + half %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmfeq.nxv2f16.f16( + , + half, + i32); + +define @intrinsic_vmfeq_vf_nxv2f16_f16( %0, half %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfeq_vf_nxv2f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmfeq.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfeq.nxv2f16.f16( + %0, + half %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmfeq.mask.nxv2f16.f16( + , + , + half, + , + i32); + +define @intrinsic_vmfeq_mask_vf_nxv2f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv2f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmfeq.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfeq.mask.nxv2f16.f16( + %0, + %1, + half %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmfeq.nxv4f16.f16( + , + half, + i32); + +define @intrinsic_vmfeq_vf_nxv4f16_f16( %0, half %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfeq_vf_nxv4f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmfeq.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfeq.nxv4f16.f16( + %0, + half %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmfeq.mask.nxv4f16.f16( + , + , + half, + , + i32); + +define @intrinsic_vmfeq_mask_vf_nxv4f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv4f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmfeq.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfeq.mask.nxv4f16.f16( + %0, + %1, + half %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmfeq.nxv8f16.f16( + , + half, + i32); + +define @intrinsic_vmfeq_vf_nxv8f16_f16( %0, half %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfeq_vf_nxv8f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmfeq.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfeq.nxv8f16.f16( + %0, + half %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmfeq.mask.nxv8f16.f16( + , + , + half, + , + i32); + +define @intrinsic_vmfeq_mask_vf_nxv8f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv8f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmfeq.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfeq.mask.nxv8f16.f16( + %0, + %1, + half %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmfeq.nxv16f16.f16( + , + half, + i32); + +define @intrinsic_vmfeq_vf_nxv16f16_f16( %0, half %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfeq_vf_nxv16f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmfeq.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfeq.nxv16f16.f16( + %0, + half %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmfeq.mask.nxv16f16.f16( + , + , + half, + , + i32); + +define @intrinsic_vmfeq_mask_vf_nxv16f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv16f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmfeq.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfeq.mask.nxv16f16.f16( + %0, + %1, + half %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmfeq.nxv1f32.f32( + , + float, + i32); + +define @intrinsic_vmfeq_vf_nxv1f32_f32( %0, float %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfeq_vf_nxv1f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmfeq.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfeq.nxv1f32.f32( + %0, + float %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmfeq.mask.nxv1f32.f32( + , + , + float, + , + i32); + +define @intrinsic_vmfeq_mask_vf_nxv1f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv1f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmfeq.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfeq.mask.nxv1f32.f32( + %0, + %1, + float %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmfeq.nxv2f32.f32( + , + float, + i32); + +define @intrinsic_vmfeq_vf_nxv2f32_f32( %0, float %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfeq_vf_nxv2f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmfeq.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfeq.nxv2f32.f32( + %0, + float %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmfeq.mask.nxv2f32.f32( + , + , + float, + , + i32); + +define @intrinsic_vmfeq_mask_vf_nxv2f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv2f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmfeq.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfeq.mask.nxv2f32.f32( + %0, + %1, + float %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmfeq.nxv4f32.f32( + , + float, + i32); + +define @intrinsic_vmfeq_vf_nxv4f32_f32( %0, float %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfeq_vf_nxv4f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmfeq.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfeq.nxv4f32.f32( + %0, + float %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmfeq.mask.nxv4f32.f32( + , + , + float, + , + i32); + +define @intrinsic_vmfeq_mask_vf_nxv4f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv4f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmfeq.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfeq.mask.nxv4f32.f32( + %0, + %1, + float %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmfeq.nxv8f32.f32( + , + float, + i32); + +define @intrinsic_vmfeq_vf_nxv8f32_f32( %0, float %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfeq_vf_nxv8f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmfeq.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfeq.nxv8f32.f32( + %0, + float %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmfeq.mask.nxv8f32.f32( + , + , + float, + , + i32); + +define @intrinsic_vmfeq_mask_vf_nxv8f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv8f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmfeq.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfeq.mask.nxv8f32.f32( + %0, + %1, + float %2, + %3, + i32 %4) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfeq-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmfeq-rv64.ll new file mode 100644 index 0000000000000..577fa6d2ce198 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vmfeq-rv64.ll @@ -0,0 +1,1009 @@ +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+experimental-zfh -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare @llvm.riscv.vmfeq.nxv1f16( + , + , + i64); + +define @intrinsic_vmfeq_vv_nxv1f16_nxv1f16( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfeq_vv_nxv1f16_nxv1f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmfeq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmfeq.nxv1f16( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfeq.mask.nxv1f16( + , + , + , + , + i64); + +define @intrinsic_vmfeq_mask_vv_nxv1f16_nxv1f16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv1f16_nxv1f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmfeq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmfeq.nxv1f16( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmfeq.mask.nxv1f16( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmfeq.nxv2f16( + , + , + i64); + +define @intrinsic_vmfeq_vv_nxv2f16_nxv2f16( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfeq_vv_nxv2f16_nxv2f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmfeq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmfeq.nxv2f16( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfeq.mask.nxv2f16( + , + , + , + , + i64); + +define @intrinsic_vmfeq_mask_vv_nxv2f16_nxv2f16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv2f16_nxv2f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmfeq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmfeq.nxv2f16( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmfeq.mask.nxv2f16( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmfeq.nxv4f16( + , + , + i64); + +define @intrinsic_vmfeq_vv_nxv4f16_nxv4f16( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfeq_vv_nxv4f16_nxv4f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmfeq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmfeq.nxv4f16( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfeq.mask.nxv4f16( + , + , + , + , + i64); + +define @intrinsic_vmfeq_mask_vv_nxv4f16_nxv4f16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv4f16_nxv4f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmfeq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmfeq.nxv4f16( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmfeq.mask.nxv4f16( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmfeq.nxv8f16( + , + , + i64); + +define @intrinsic_vmfeq_vv_nxv8f16_nxv8f16( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfeq_vv_nxv8f16_nxv8f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmfeq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmfeq.nxv8f16( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfeq.mask.nxv8f16( + , + , + , + , + i64); + +define @intrinsic_vmfeq_mask_vv_nxv8f16_nxv8f16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv8f16_nxv8f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmfeq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmfeq.nxv8f16( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmfeq.mask.nxv8f16( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmfeq.nxv16f16( + , + , + i64); + +define @intrinsic_vmfeq_vv_nxv16f16_nxv16f16( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfeq_vv_nxv16f16_nxv16f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmfeq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmfeq.nxv16f16( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfeq.mask.nxv16f16( + , + , + , + , + i64); + +define @intrinsic_vmfeq_mask_vv_nxv16f16_nxv16f16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv16f16_nxv16f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmfeq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmfeq.nxv16f16( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmfeq.mask.nxv16f16( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmfeq.nxv1f32( + , + , + i64); + +define @intrinsic_vmfeq_vv_nxv1f32_nxv1f32( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfeq_vv_nxv1f32_nxv1f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmfeq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmfeq.nxv1f32( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfeq.mask.nxv1f32( + , + , + , + , + i64); + +define @intrinsic_vmfeq_mask_vv_nxv1f32_nxv1f32( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv1f32_nxv1f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmfeq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmfeq.nxv1f32( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmfeq.mask.nxv1f32( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmfeq.nxv2f32( + , + , + i64); + +define @intrinsic_vmfeq_vv_nxv2f32_nxv2f32( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfeq_vv_nxv2f32_nxv2f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmfeq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmfeq.nxv2f32( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfeq.mask.nxv2f32( + , + , + , + , + i64); + +define @intrinsic_vmfeq_mask_vv_nxv2f32_nxv2f32( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv2f32_nxv2f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmfeq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmfeq.nxv2f32( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmfeq.mask.nxv2f32( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmfeq.nxv4f32( + , + , + i64); + +define @intrinsic_vmfeq_vv_nxv4f32_nxv4f32( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfeq_vv_nxv4f32_nxv4f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmfeq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmfeq.nxv4f32( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfeq.mask.nxv4f32( + , + , + , + , + i64); + +define @intrinsic_vmfeq_mask_vv_nxv4f32_nxv4f32( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv4f32_nxv4f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmfeq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmfeq.nxv4f32( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmfeq.mask.nxv4f32( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmfeq.nxv8f32( + , + , + i64); + +define @intrinsic_vmfeq_vv_nxv8f32_nxv8f32( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfeq_vv_nxv8f32_nxv8f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmfeq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmfeq.nxv8f32( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfeq.mask.nxv8f32( + , + , + , + , + i64); + +define @intrinsic_vmfeq_mask_vv_nxv8f32_nxv8f32( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv8f32_nxv8f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmfeq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmfeq.nxv8f32( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmfeq.mask.nxv8f32( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmfeq.nxv1f64( + , + , + i64); + +define @intrinsic_vmfeq_vv_nxv1f64_nxv1f64( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfeq_vv_nxv1f64_nxv1f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vmfeq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmfeq.nxv1f64( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfeq.mask.nxv1f64( + , + , + , + , + i64); + +define @intrinsic_vmfeq_mask_vv_nxv1f64_nxv1f64( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv1f64_nxv1f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vmfeq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmfeq.nxv1f64( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmfeq.mask.nxv1f64( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmfeq.nxv2f64( + , + , + i64); + +define @intrinsic_vmfeq_vv_nxv2f64_nxv2f64( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfeq_vv_nxv2f64_nxv2f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vmfeq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmfeq.nxv2f64( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfeq.mask.nxv2f64( + , + , + , + , + i64); + +define @intrinsic_vmfeq_mask_vv_nxv2f64_nxv2f64( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv2f64_nxv2f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vmfeq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmfeq.nxv2f64( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmfeq.mask.nxv2f64( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmfeq.nxv4f64( + , + , + i64); + +define @intrinsic_vmfeq_vv_nxv4f64_nxv4f64( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfeq_vv_nxv4f64_nxv4f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vmfeq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmfeq.nxv4f64( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfeq.mask.nxv4f64( + , + , + , + , + i64); + +define @intrinsic_vmfeq_mask_vv_nxv4f64_nxv4f64( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv4f64_nxv4f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vmfeq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmfeq.nxv4f64( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmfeq.mask.nxv4f64( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmfeq.nxv1f16.f16( + , + half, + i64); + +define @intrinsic_vmfeq_vf_nxv1f16_f16( %0, half %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfeq_vf_nxv1f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmfeq.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfeq.nxv1f16.f16( + %0, + half %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfeq.mask.nxv1f16.f16( + , + , + half, + , + i64); + +define @intrinsic_vmfeq_mask_vf_nxv1f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv1f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmfeq.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfeq.mask.nxv1f16.f16( + %0, + %1, + half %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmfeq.nxv2f16.f16( + , + half, + i64); + +define @intrinsic_vmfeq_vf_nxv2f16_f16( %0, half %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfeq_vf_nxv2f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmfeq.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfeq.nxv2f16.f16( + %0, + half %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfeq.mask.nxv2f16.f16( + , + , + half, + , + i64); + +define @intrinsic_vmfeq_mask_vf_nxv2f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv2f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmfeq.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfeq.mask.nxv2f16.f16( + %0, + %1, + half %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmfeq.nxv4f16.f16( + , + half, + i64); + +define @intrinsic_vmfeq_vf_nxv4f16_f16( %0, half %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfeq_vf_nxv4f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmfeq.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfeq.nxv4f16.f16( + %0, + half %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfeq.mask.nxv4f16.f16( + , + , + half, + , + i64); + +define @intrinsic_vmfeq_mask_vf_nxv4f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv4f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmfeq.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfeq.mask.nxv4f16.f16( + %0, + %1, + half %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmfeq.nxv8f16.f16( + , + half, + i64); + +define @intrinsic_vmfeq_vf_nxv8f16_f16( %0, half %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfeq_vf_nxv8f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmfeq.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfeq.nxv8f16.f16( + %0, + half %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfeq.mask.nxv8f16.f16( + , + , + half, + , + i64); + +define @intrinsic_vmfeq_mask_vf_nxv8f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv8f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmfeq.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfeq.mask.nxv8f16.f16( + %0, + %1, + half %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmfeq.nxv16f16.f16( + , + half, + i64); + +define @intrinsic_vmfeq_vf_nxv16f16_f16( %0, half %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfeq_vf_nxv16f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmfeq.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfeq.nxv16f16.f16( + %0, + half %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfeq.mask.nxv16f16.f16( + , + , + half, + , + i64); + +define @intrinsic_vmfeq_mask_vf_nxv16f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv16f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmfeq.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfeq.mask.nxv16f16.f16( + %0, + %1, + half %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmfeq.nxv1f32.f32( + , + float, + i64); + +define @intrinsic_vmfeq_vf_nxv1f32_f32( %0, float %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfeq_vf_nxv1f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmfeq.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfeq.nxv1f32.f32( + %0, + float %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfeq.mask.nxv1f32.f32( + , + , + float, + , + i64); + +define @intrinsic_vmfeq_mask_vf_nxv1f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv1f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmfeq.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfeq.mask.nxv1f32.f32( + %0, + %1, + float %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmfeq.nxv2f32.f32( + , + float, + i64); + +define @intrinsic_vmfeq_vf_nxv2f32_f32( %0, float %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfeq_vf_nxv2f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmfeq.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfeq.nxv2f32.f32( + %0, + float %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfeq.mask.nxv2f32.f32( + , + , + float, + , + i64); + +define @intrinsic_vmfeq_mask_vf_nxv2f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv2f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmfeq.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfeq.mask.nxv2f32.f32( + %0, + %1, + float %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmfeq.nxv4f32.f32( + , + float, + i64); + +define @intrinsic_vmfeq_vf_nxv4f32_f32( %0, float %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfeq_vf_nxv4f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmfeq.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfeq.nxv4f32.f32( + %0, + float %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfeq.mask.nxv4f32.f32( + , + , + float, + , + i64); + +define @intrinsic_vmfeq_mask_vf_nxv4f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv4f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmfeq.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfeq.mask.nxv4f32.f32( + %0, + %1, + float %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmfeq.nxv8f32.f32( + , + float, + i64); + +define @intrinsic_vmfeq_vf_nxv8f32_f32( %0, float %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfeq_vf_nxv8f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmfeq.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfeq.nxv8f32.f32( + %0, + float %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfeq.mask.nxv8f32.f32( + , + , + float, + , + i64); + +define @intrinsic_vmfeq_mask_vf_nxv8f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv8f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmfeq.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfeq.mask.nxv8f32.f32( + %0, + %1, + float %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmfeq.nxv1f64.f64( + , + double, + i64); + +define @intrinsic_vmfeq_vf_nxv1f64_f64( %0, double %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfeq_vf_nxv1f64_f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vmfeq.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfeq.nxv1f64.f64( + %0, + double %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfeq.mask.nxv1f64.f64( + , + , + double, + , + i64); + +define @intrinsic_vmfeq_mask_vf_nxv1f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv1f64_f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vmfeq.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfeq.mask.nxv1f64.f64( + %0, + %1, + double %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmfeq.nxv2f64.f64( + , + double, + i64); + +define @intrinsic_vmfeq_vf_nxv2f64_f64( %0, double %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfeq_vf_nxv2f64_f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vmfeq.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfeq.nxv2f64.f64( + %0, + double %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfeq.mask.nxv2f64.f64( + , + , + double, + , + i64); + +define @intrinsic_vmfeq_mask_vf_nxv2f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv2f64_f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vmfeq.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfeq.mask.nxv2f64.f64( + %0, + %1, + double %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmfeq.nxv4f64.f64( + , + double, + i64); + +define @intrinsic_vmfeq_vf_nxv4f64_f64( %0, double %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfeq_vf_nxv4f64_f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vmfeq.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfeq.nxv4f64.f64( + %0, + double %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfeq.mask.nxv4f64.f64( + , + , + double, + , + i64); + +define @intrinsic_vmfeq_mask_vf_nxv4f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv4f64_f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vmfeq.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfeq.mask.nxv4f64.f64( + %0, + %1, + double %2, + %3, + i64 %4) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfge-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmfge-rv32.ll new file mode 100644 index 0000000000000..42189c52c2285 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vmfge-rv32.ll @@ -0,0 +1,361 @@ +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f,+experimental-zfh -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare @llvm.riscv.vmfge.nxv1f16.f16( + , + half, + i32); + +define @intrinsic_vmfge_vf_nxv1f16_f16( %0, half %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfge_vf_nxv1f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmfge.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfge.nxv1f16.f16( + %0, + half %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmfge.mask.nxv1f16.f16( + , + , + half, + , + i32); + +define @intrinsic_vmfge_mask_vf_nxv1f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv1f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmfge.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfge.mask.nxv1f16.f16( + %0, + %1, + half %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmfge.nxv2f16.f16( + , + half, + i32); + +define @intrinsic_vmfge_vf_nxv2f16_f16( %0, half %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfge_vf_nxv2f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmfge.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfge.nxv2f16.f16( + %0, + half %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmfge.mask.nxv2f16.f16( + , + , + half, + , + i32); + +define @intrinsic_vmfge_mask_vf_nxv2f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv2f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmfge.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfge.mask.nxv2f16.f16( + %0, + %1, + half %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmfge.nxv4f16.f16( + , + half, + i32); + +define @intrinsic_vmfge_vf_nxv4f16_f16( %0, half %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfge_vf_nxv4f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmfge.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfge.nxv4f16.f16( + %0, + half %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmfge.mask.nxv4f16.f16( + , + , + half, + , + i32); + +define @intrinsic_vmfge_mask_vf_nxv4f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv4f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmfge.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfge.mask.nxv4f16.f16( + %0, + %1, + half %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmfge.nxv8f16.f16( + , + half, + i32); + +define @intrinsic_vmfge_vf_nxv8f16_f16( %0, half %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfge_vf_nxv8f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmfge.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfge.nxv8f16.f16( + %0, + half %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmfge.mask.nxv8f16.f16( + , + , + half, + , + i32); + +define @intrinsic_vmfge_mask_vf_nxv8f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv8f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmfge.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfge.mask.nxv8f16.f16( + %0, + %1, + half %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmfge.nxv16f16.f16( + , + half, + i32); + +define @intrinsic_vmfge_vf_nxv16f16_f16( %0, half %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfge_vf_nxv16f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmfge.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfge.nxv16f16.f16( + %0, + half %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmfge.mask.nxv16f16.f16( + , + , + half, + , + i32); + +define @intrinsic_vmfge_mask_vf_nxv16f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv16f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmfge.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfge.mask.nxv16f16.f16( + %0, + %1, + half %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmfge.nxv1f32.f32( + , + float, + i32); + +define @intrinsic_vmfge_vf_nxv1f32_f32( %0, float %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfge_vf_nxv1f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmfge.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfge.nxv1f32.f32( + %0, + float %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmfge.mask.nxv1f32.f32( + , + , + float, + , + i32); + +define @intrinsic_vmfge_mask_vf_nxv1f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv1f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmfge.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfge.mask.nxv1f32.f32( + %0, + %1, + float %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmfge.nxv2f32.f32( + , + float, + i32); + +define @intrinsic_vmfge_vf_nxv2f32_f32( %0, float %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfge_vf_nxv2f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmfge.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfge.nxv2f32.f32( + %0, + float %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmfge.mask.nxv2f32.f32( + , + , + float, + , + i32); + +define @intrinsic_vmfge_mask_vf_nxv2f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv2f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmfge.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfge.mask.nxv2f32.f32( + %0, + %1, + float %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmfge.nxv4f32.f32( + , + float, + i32); + +define @intrinsic_vmfge_vf_nxv4f32_f32( %0, float %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfge_vf_nxv4f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmfge.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfge.nxv4f32.f32( + %0, + float %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmfge.mask.nxv4f32.f32( + , + , + float, + , + i32); + +define @intrinsic_vmfge_mask_vf_nxv4f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv4f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmfge.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfge.mask.nxv4f32.f32( + %0, + %1, + float %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmfge.nxv8f32.f32( + , + float, + i32); + +define @intrinsic_vmfge_vf_nxv8f32_f32( %0, float %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfge_vf_nxv8f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmfge.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfge.nxv8f32.f32( + %0, + float %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmfge.mask.nxv8f32.f32( + , + , + float, + , + i32); + +define @intrinsic_vmfge_mask_vf_nxv8f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv8f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmfge.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfge.mask.nxv8f32.f32( + %0, + %1, + float %2, + %3, + i32 %4) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfge-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmfge-rv64.ll new file mode 100644 index 0000000000000..8d6af219ce133 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vmfge-rv64.ll @@ -0,0 +1,481 @@ +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+experimental-zfh -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare @llvm.riscv.vmfge.nxv1f16.f16( + , + half, + i64); + +define @intrinsic_vmfge_vf_nxv1f16_f16( %0, half %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfge_vf_nxv1f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmfge.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfge.nxv1f16.f16( + %0, + half %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfge.mask.nxv1f16.f16( + , + , + half, + , + i64); + +define @intrinsic_vmfge_mask_vf_nxv1f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv1f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmfge.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfge.mask.nxv1f16.f16( + %0, + %1, + half %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmfge.nxv2f16.f16( + , + half, + i64); + +define @intrinsic_vmfge_vf_nxv2f16_f16( %0, half %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfge_vf_nxv2f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmfge.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfge.nxv2f16.f16( + %0, + half %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfge.mask.nxv2f16.f16( + , + , + half, + , + i64); + +define @intrinsic_vmfge_mask_vf_nxv2f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv2f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmfge.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfge.mask.nxv2f16.f16( + %0, + %1, + half %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmfge.nxv4f16.f16( + , + half, + i64); + +define @intrinsic_vmfge_vf_nxv4f16_f16( %0, half %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfge_vf_nxv4f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmfge.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfge.nxv4f16.f16( + %0, + half %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfge.mask.nxv4f16.f16( + , + , + half, + , + i64); + +define @intrinsic_vmfge_mask_vf_nxv4f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv4f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmfge.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfge.mask.nxv4f16.f16( + %0, + %1, + half %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmfge.nxv8f16.f16( + , + half, + i64); + +define @intrinsic_vmfge_vf_nxv8f16_f16( %0, half %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfge_vf_nxv8f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmfge.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfge.nxv8f16.f16( + %0, + half %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfge.mask.nxv8f16.f16( + , + , + half, + , + i64); + +define @intrinsic_vmfge_mask_vf_nxv8f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv8f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmfge.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfge.mask.nxv8f16.f16( + %0, + %1, + half %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmfge.nxv16f16.f16( + , + half, + i64); + +define @intrinsic_vmfge_vf_nxv16f16_f16( %0, half %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfge_vf_nxv16f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmfge.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfge.nxv16f16.f16( + %0, + half %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfge.mask.nxv16f16.f16( + , + , + half, + , + i64); + +define @intrinsic_vmfge_mask_vf_nxv16f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv16f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmfge.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfge.mask.nxv16f16.f16( + %0, + %1, + half %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmfge.nxv1f32.f32( + , + float, + i64); + +define @intrinsic_vmfge_vf_nxv1f32_f32( %0, float %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfge_vf_nxv1f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmfge.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfge.nxv1f32.f32( + %0, + float %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfge.mask.nxv1f32.f32( + , + , + float, + , + i64); + +define @intrinsic_vmfge_mask_vf_nxv1f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv1f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmfge.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfge.mask.nxv1f32.f32( + %0, + %1, + float %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmfge.nxv2f32.f32( + , + float, + i64); + +define @intrinsic_vmfge_vf_nxv2f32_f32( %0, float %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfge_vf_nxv2f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmfge.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfge.nxv2f32.f32( + %0, + float %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfge.mask.nxv2f32.f32( + , + , + float, + , + i64); + +define @intrinsic_vmfge_mask_vf_nxv2f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv2f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmfge.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfge.mask.nxv2f32.f32( + %0, + %1, + float %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmfge.nxv4f32.f32( + , + float, + i64); + +define @intrinsic_vmfge_vf_nxv4f32_f32( %0, float %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfge_vf_nxv4f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmfge.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfge.nxv4f32.f32( + %0, + float %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfge.mask.nxv4f32.f32( + , + , + float, + , + i64); + +define @intrinsic_vmfge_mask_vf_nxv4f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv4f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmfge.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfge.mask.nxv4f32.f32( + %0, + %1, + float %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmfge.nxv8f32.f32( + , + float, + i64); + +define @intrinsic_vmfge_vf_nxv8f32_f32( %0, float %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfge_vf_nxv8f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmfge.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfge.nxv8f32.f32( + %0, + float %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfge.mask.nxv8f32.f32( + , + , + float, + , + i64); + +define @intrinsic_vmfge_mask_vf_nxv8f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv8f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmfge.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfge.mask.nxv8f32.f32( + %0, + %1, + float %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmfge.nxv1f64.f64( + , + double, + i64); + +define @intrinsic_vmfge_vf_nxv1f64_f64( %0, double %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfge_vf_nxv1f64_f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vmfge.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfge.nxv1f64.f64( + %0, + double %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfge.mask.nxv1f64.f64( + , + , + double, + , + i64); + +define @intrinsic_vmfge_mask_vf_nxv1f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv1f64_f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vmfge.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfge.mask.nxv1f64.f64( + %0, + %1, + double %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmfge.nxv2f64.f64( + , + double, + i64); + +define @intrinsic_vmfge_vf_nxv2f64_f64( %0, double %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfge_vf_nxv2f64_f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vmfge.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfge.nxv2f64.f64( + %0, + double %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfge.mask.nxv2f64.f64( + , + , + double, + , + i64); + +define @intrinsic_vmfge_mask_vf_nxv2f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv2f64_f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vmfge.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfge.mask.nxv2f64.f64( + %0, + %1, + double %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmfge.nxv4f64.f64( + , + double, + i64); + +define @intrinsic_vmfge_vf_nxv4f64_f64( %0, double %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfge_vf_nxv4f64_f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vmfge.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfge.nxv4f64.f64( + %0, + double %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfge.mask.nxv4f64.f64( + , + , + double, + , + i64); + +define @intrinsic_vmfge_mask_vf_nxv4f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv4f64_f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vmfge.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfge.mask.nxv4f64.f64( + %0, + %1, + double %2, + %3, + i64 %4) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfgt-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmfgt-rv32.ll new file mode 100644 index 0000000000000..724d0e8afc0f6 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vmfgt-rv32.ll @@ -0,0 +1,361 @@ +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f,+experimental-zfh -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare @llvm.riscv.vmfgt.nxv1f16.f16( + , + half, + i32); + +define @intrinsic_vmfgt_vf_nxv1f16_f16( %0, half %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfgt_vf_nxv1f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmfgt.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfgt.nxv1f16.f16( + %0, + half %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmfgt.mask.nxv1f16.f16( + , + , + half, + , + i32); + +define @intrinsic_vmfgt_mask_vf_nxv1f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv1f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmfgt.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfgt.mask.nxv1f16.f16( + %0, + %1, + half %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmfgt.nxv2f16.f16( + , + half, + i32); + +define @intrinsic_vmfgt_vf_nxv2f16_f16( %0, half %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfgt_vf_nxv2f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmfgt.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfgt.nxv2f16.f16( + %0, + half %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmfgt.mask.nxv2f16.f16( + , + , + half, + , + i32); + +define @intrinsic_vmfgt_mask_vf_nxv2f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv2f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmfgt.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfgt.mask.nxv2f16.f16( + %0, + %1, + half %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmfgt.nxv4f16.f16( + , + half, + i32); + +define @intrinsic_vmfgt_vf_nxv4f16_f16( %0, half %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfgt_vf_nxv4f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmfgt.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfgt.nxv4f16.f16( + %0, + half %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmfgt.mask.nxv4f16.f16( + , + , + half, + , + i32); + +define @intrinsic_vmfgt_mask_vf_nxv4f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv4f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmfgt.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfgt.mask.nxv4f16.f16( + %0, + %1, + half %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmfgt.nxv8f16.f16( + , + half, + i32); + +define @intrinsic_vmfgt_vf_nxv8f16_f16( %0, half %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfgt_vf_nxv8f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmfgt.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfgt.nxv8f16.f16( + %0, + half %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmfgt.mask.nxv8f16.f16( + , + , + half, + , + i32); + +define @intrinsic_vmfgt_mask_vf_nxv8f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv8f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmfgt.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfgt.mask.nxv8f16.f16( + %0, + %1, + half %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmfgt.nxv16f16.f16( + , + half, + i32); + +define @intrinsic_vmfgt_vf_nxv16f16_f16( %0, half %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfgt_vf_nxv16f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmfgt.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfgt.nxv16f16.f16( + %0, + half %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmfgt.mask.nxv16f16.f16( + , + , + half, + , + i32); + +define @intrinsic_vmfgt_mask_vf_nxv16f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv16f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmfgt.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfgt.mask.nxv16f16.f16( + %0, + %1, + half %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmfgt.nxv1f32.f32( + , + float, + i32); + +define @intrinsic_vmfgt_vf_nxv1f32_f32( %0, float %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfgt_vf_nxv1f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmfgt.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfgt.nxv1f32.f32( + %0, + float %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmfgt.mask.nxv1f32.f32( + , + , + float, + , + i32); + +define @intrinsic_vmfgt_mask_vf_nxv1f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv1f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmfgt.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfgt.mask.nxv1f32.f32( + %0, + %1, + float %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmfgt.nxv2f32.f32( + , + float, + i32); + +define @intrinsic_vmfgt_vf_nxv2f32_f32( %0, float %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfgt_vf_nxv2f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmfgt.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfgt.nxv2f32.f32( + %0, + float %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmfgt.mask.nxv2f32.f32( + , + , + float, + , + i32); + +define @intrinsic_vmfgt_mask_vf_nxv2f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv2f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmfgt.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfgt.mask.nxv2f32.f32( + %0, + %1, + float %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmfgt.nxv4f32.f32( + , + float, + i32); + +define @intrinsic_vmfgt_vf_nxv4f32_f32( %0, float %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfgt_vf_nxv4f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmfgt.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfgt.nxv4f32.f32( + %0, + float %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmfgt.mask.nxv4f32.f32( + , + , + float, + , + i32); + +define @intrinsic_vmfgt_mask_vf_nxv4f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv4f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmfgt.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfgt.mask.nxv4f32.f32( + %0, + %1, + float %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmfgt.nxv8f32.f32( + , + float, + i32); + +define @intrinsic_vmfgt_vf_nxv8f32_f32( %0, float %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfgt_vf_nxv8f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmfgt.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfgt.nxv8f32.f32( + %0, + float %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmfgt.mask.nxv8f32.f32( + , + , + float, + , + i32); + +define @intrinsic_vmfgt_mask_vf_nxv8f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv8f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmfgt.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfgt.mask.nxv8f32.f32( + %0, + %1, + float %2, + %3, + i32 %4) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfgt-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmfgt-rv64.ll new file mode 100644 index 0000000000000..6145ac78ae073 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vmfgt-rv64.ll @@ -0,0 +1,481 @@ +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+experimental-zfh -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare @llvm.riscv.vmfgt.nxv1f16.f16( + , + half, + i64); + +define @intrinsic_vmfgt_vf_nxv1f16_f16( %0, half %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfgt_vf_nxv1f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmfgt.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfgt.nxv1f16.f16( + %0, + half %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfgt.mask.nxv1f16.f16( + , + , + half, + , + i64); + +define @intrinsic_vmfgt_mask_vf_nxv1f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv1f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmfgt.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfgt.mask.nxv1f16.f16( + %0, + %1, + half %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmfgt.nxv2f16.f16( + , + half, + i64); + +define @intrinsic_vmfgt_vf_nxv2f16_f16( %0, half %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfgt_vf_nxv2f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmfgt.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfgt.nxv2f16.f16( + %0, + half %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfgt.mask.nxv2f16.f16( + , + , + half, + , + i64); + +define @intrinsic_vmfgt_mask_vf_nxv2f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv2f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmfgt.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfgt.mask.nxv2f16.f16( + %0, + %1, + half %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmfgt.nxv4f16.f16( + , + half, + i64); + +define @intrinsic_vmfgt_vf_nxv4f16_f16( %0, half %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfgt_vf_nxv4f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmfgt.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfgt.nxv4f16.f16( + %0, + half %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfgt.mask.nxv4f16.f16( + , + , + half, + , + i64); + +define @intrinsic_vmfgt_mask_vf_nxv4f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv4f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmfgt.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfgt.mask.nxv4f16.f16( + %0, + %1, + half %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmfgt.nxv8f16.f16( + , + half, + i64); + +define @intrinsic_vmfgt_vf_nxv8f16_f16( %0, half %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfgt_vf_nxv8f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmfgt.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfgt.nxv8f16.f16( + %0, + half %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfgt.mask.nxv8f16.f16( + , + , + half, + , + i64); + +define @intrinsic_vmfgt_mask_vf_nxv8f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv8f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmfgt.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfgt.mask.nxv8f16.f16( + %0, + %1, + half %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmfgt.nxv16f16.f16( + , + half, + i64); + +define @intrinsic_vmfgt_vf_nxv16f16_f16( %0, half %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfgt_vf_nxv16f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmfgt.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfgt.nxv16f16.f16( + %0, + half %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfgt.mask.nxv16f16.f16( + , + , + half, + , + i64); + +define @intrinsic_vmfgt_mask_vf_nxv16f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv16f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmfgt.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfgt.mask.nxv16f16.f16( + %0, + %1, + half %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmfgt.nxv1f32.f32( + , + float, + i64); + +define @intrinsic_vmfgt_vf_nxv1f32_f32( %0, float %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfgt_vf_nxv1f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmfgt.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfgt.nxv1f32.f32( + %0, + float %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfgt.mask.nxv1f32.f32( + , + , + float, + , + i64); + +define @intrinsic_vmfgt_mask_vf_nxv1f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv1f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmfgt.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfgt.mask.nxv1f32.f32( + %0, + %1, + float %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmfgt.nxv2f32.f32( + , + float, + i64); + +define @intrinsic_vmfgt_vf_nxv2f32_f32( %0, float %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfgt_vf_nxv2f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmfgt.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfgt.nxv2f32.f32( + %0, + float %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfgt.mask.nxv2f32.f32( + , + , + float, + , + i64); + +define @intrinsic_vmfgt_mask_vf_nxv2f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv2f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmfgt.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfgt.mask.nxv2f32.f32( + %0, + %1, + float %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmfgt.nxv4f32.f32( + , + float, + i64); + +define @intrinsic_vmfgt_vf_nxv4f32_f32( %0, float %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfgt_vf_nxv4f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmfgt.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfgt.nxv4f32.f32( + %0, + float %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfgt.mask.nxv4f32.f32( + , + , + float, + , + i64); + +define @intrinsic_vmfgt_mask_vf_nxv4f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv4f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmfgt.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfgt.mask.nxv4f32.f32( + %0, + %1, + float %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmfgt.nxv8f32.f32( + , + float, + i64); + +define @intrinsic_vmfgt_vf_nxv8f32_f32( %0, float %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfgt_vf_nxv8f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmfgt.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfgt.nxv8f32.f32( + %0, + float %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfgt.mask.nxv8f32.f32( + , + , + float, + , + i64); + +define @intrinsic_vmfgt_mask_vf_nxv8f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv8f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmfgt.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfgt.mask.nxv8f32.f32( + %0, + %1, + float %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmfgt.nxv1f64.f64( + , + double, + i64); + +define @intrinsic_vmfgt_vf_nxv1f64_f64( %0, double %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfgt_vf_nxv1f64_f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vmfgt.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfgt.nxv1f64.f64( + %0, + double %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfgt.mask.nxv1f64.f64( + , + , + double, + , + i64); + +define @intrinsic_vmfgt_mask_vf_nxv1f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv1f64_f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vmfgt.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfgt.mask.nxv1f64.f64( + %0, + %1, + double %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmfgt.nxv2f64.f64( + , + double, + i64); + +define @intrinsic_vmfgt_vf_nxv2f64_f64( %0, double %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfgt_vf_nxv2f64_f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vmfgt.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfgt.nxv2f64.f64( + %0, + double %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfgt.mask.nxv2f64.f64( + , + , + double, + , + i64); + +define @intrinsic_vmfgt_mask_vf_nxv2f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv2f64_f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vmfgt.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfgt.mask.nxv2f64.f64( + %0, + %1, + double %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmfgt.nxv4f64.f64( + , + double, + i64); + +define @intrinsic_vmfgt_vf_nxv4f64_f64( %0, double %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfgt_vf_nxv4f64_f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vmfgt.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfgt.nxv4f64.f64( + %0, + double %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfgt.mask.nxv4f64.f64( + , + , + double, + , + i64); + +define @intrinsic_vmfgt_mask_vf_nxv4f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv4f64_f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vmfgt.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfgt.mask.nxv4f64.f64( + %0, + %1, + double %2, + %3, + i64 %4) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfle-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmfle-rv32.ll new file mode 100644 index 0000000000000..870c1fdd7bc54 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vmfle-rv32.ll @@ -0,0 +1,757 @@ +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f,+experimental-zfh -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare @llvm.riscv.vmfle.nxv1f16( + , + , + i32); + +define @intrinsic_vmfle_vv_nxv1f16_nxv1f16( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfle_vv_nxv1f16_nxv1f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmfle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmfle.nxv1f16( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmfle.mask.nxv1f16( + , + , + , + , + i32); + +define @intrinsic_vmfle_mask_vv_nxv1f16_nxv1f16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv1f16_nxv1f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmfle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmfle.nxv1f16( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmfle.mask.nxv1f16( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmfle.nxv2f16( + , + , + i32); + +define @intrinsic_vmfle_vv_nxv2f16_nxv2f16( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfle_vv_nxv2f16_nxv2f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmfle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmfle.nxv2f16( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmfle.mask.nxv2f16( + , + , + , + , + i32); + +define @intrinsic_vmfle_mask_vv_nxv2f16_nxv2f16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv2f16_nxv2f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmfle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmfle.nxv2f16( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmfle.mask.nxv2f16( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmfle.nxv4f16( + , + , + i32); + +define @intrinsic_vmfle_vv_nxv4f16_nxv4f16( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfle_vv_nxv4f16_nxv4f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmfle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmfle.nxv4f16( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmfle.mask.nxv4f16( + , + , + , + , + i32); + +define @intrinsic_vmfle_mask_vv_nxv4f16_nxv4f16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv4f16_nxv4f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmfle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmfle.nxv4f16( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmfle.mask.nxv4f16( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmfle.nxv8f16( + , + , + i32); + +define @intrinsic_vmfle_vv_nxv8f16_nxv8f16( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfle_vv_nxv8f16_nxv8f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmfle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmfle.nxv8f16( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmfle.mask.nxv8f16( + , + , + , + , + i32); + +define @intrinsic_vmfle_mask_vv_nxv8f16_nxv8f16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv8f16_nxv8f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmfle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmfle.nxv8f16( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmfle.mask.nxv8f16( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmfle.nxv16f16( + , + , + i32); + +define @intrinsic_vmfle_vv_nxv16f16_nxv16f16( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfle_vv_nxv16f16_nxv16f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmfle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmfle.nxv16f16( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmfle.mask.nxv16f16( + , + , + , + , + i32); + +define @intrinsic_vmfle_mask_vv_nxv16f16_nxv16f16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv16f16_nxv16f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmfle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmfle.nxv16f16( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmfle.mask.nxv16f16( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmfle.nxv1f32( + , + , + i32); + +define @intrinsic_vmfle_vv_nxv1f32_nxv1f32( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfle_vv_nxv1f32_nxv1f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmfle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmfle.nxv1f32( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmfle.mask.nxv1f32( + , + , + , + , + i32); + +define @intrinsic_vmfle_mask_vv_nxv1f32_nxv1f32( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv1f32_nxv1f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmfle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmfle.nxv1f32( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmfle.mask.nxv1f32( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmfle.nxv2f32( + , + , + i32); + +define @intrinsic_vmfle_vv_nxv2f32_nxv2f32( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfle_vv_nxv2f32_nxv2f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmfle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmfle.nxv2f32( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmfle.mask.nxv2f32( + , + , + , + , + i32); + +define @intrinsic_vmfle_mask_vv_nxv2f32_nxv2f32( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv2f32_nxv2f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmfle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmfle.nxv2f32( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmfle.mask.nxv2f32( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmfle.nxv4f32( + , + , + i32); + +define @intrinsic_vmfle_vv_nxv4f32_nxv4f32( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfle_vv_nxv4f32_nxv4f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmfle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmfle.nxv4f32( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmfle.mask.nxv4f32( + , + , + , + , + i32); + +define @intrinsic_vmfle_mask_vv_nxv4f32_nxv4f32( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv4f32_nxv4f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmfle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmfle.nxv4f32( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmfle.mask.nxv4f32( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmfle.nxv8f32( + , + , + i32); + +define @intrinsic_vmfle_vv_nxv8f32_nxv8f32( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfle_vv_nxv8f32_nxv8f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmfle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmfle.nxv8f32( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmfle.mask.nxv8f32( + , + , + , + , + i32); + +define @intrinsic_vmfle_mask_vv_nxv8f32_nxv8f32( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv8f32_nxv8f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmfle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmfle.nxv8f32( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmfle.mask.nxv8f32( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmfle.nxv1f16.f16( + , + half, + i32); + +define @intrinsic_vmfle_vf_nxv1f16_f16( %0, half %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfle_vf_nxv1f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmfle.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfle.nxv1f16.f16( + %0, + half %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmfle.mask.nxv1f16.f16( + , + , + half, + , + i32); + +define @intrinsic_vmfle_mask_vf_nxv1f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv1f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmfle.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfle.mask.nxv1f16.f16( + %0, + %1, + half %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmfle.nxv2f16.f16( + , + half, + i32); + +define @intrinsic_vmfle_vf_nxv2f16_f16( %0, half %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfle_vf_nxv2f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmfle.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfle.nxv2f16.f16( + %0, + half %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmfle.mask.nxv2f16.f16( + , + , + half, + , + i32); + +define @intrinsic_vmfle_mask_vf_nxv2f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv2f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmfle.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfle.mask.nxv2f16.f16( + %0, + %1, + half %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmfle.nxv4f16.f16( + , + half, + i32); + +define @intrinsic_vmfle_vf_nxv4f16_f16( %0, half %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfle_vf_nxv4f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmfle.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfle.nxv4f16.f16( + %0, + half %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmfle.mask.nxv4f16.f16( + , + , + half, + , + i32); + +define @intrinsic_vmfle_mask_vf_nxv4f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv4f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmfle.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfle.mask.nxv4f16.f16( + %0, + %1, + half %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmfle.nxv8f16.f16( + , + half, + i32); + +define @intrinsic_vmfle_vf_nxv8f16_f16( %0, half %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfle_vf_nxv8f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmfle.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfle.nxv8f16.f16( + %0, + half %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmfle.mask.nxv8f16.f16( + , + , + half, + , + i32); + +define @intrinsic_vmfle_mask_vf_nxv8f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv8f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmfle.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfle.mask.nxv8f16.f16( + %0, + %1, + half %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmfle.nxv16f16.f16( + , + half, + i32); + +define @intrinsic_vmfle_vf_nxv16f16_f16( %0, half %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfle_vf_nxv16f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmfle.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfle.nxv16f16.f16( + %0, + half %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmfle.mask.nxv16f16.f16( + , + , + half, + , + i32); + +define @intrinsic_vmfle_mask_vf_nxv16f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv16f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmfle.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfle.mask.nxv16f16.f16( + %0, + %1, + half %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmfle.nxv1f32.f32( + , + float, + i32); + +define @intrinsic_vmfle_vf_nxv1f32_f32( %0, float %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfle_vf_nxv1f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmfle.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfle.nxv1f32.f32( + %0, + float %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmfle.mask.nxv1f32.f32( + , + , + float, + , + i32); + +define @intrinsic_vmfle_mask_vf_nxv1f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv1f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmfle.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfle.mask.nxv1f32.f32( + %0, + %1, + float %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmfle.nxv2f32.f32( + , + float, + i32); + +define @intrinsic_vmfle_vf_nxv2f32_f32( %0, float %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfle_vf_nxv2f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmfle.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfle.nxv2f32.f32( + %0, + float %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmfle.mask.nxv2f32.f32( + , + , + float, + , + i32); + +define @intrinsic_vmfle_mask_vf_nxv2f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv2f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmfle.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfle.mask.nxv2f32.f32( + %0, + %1, + float %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmfle.nxv4f32.f32( + , + float, + i32); + +define @intrinsic_vmfle_vf_nxv4f32_f32( %0, float %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfle_vf_nxv4f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmfle.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfle.nxv4f32.f32( + %0, + float %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmfle.mask.nxv4f32.f32( + , + , + float, + , + i32); + +define @intrinsic_vmfle_mask_vf_nxv4f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv4f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmfle.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfle.mask.nxv4f32.f32( + %0, + %1, + float %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmfle.nxv8f32.f32( + , + float, + i32); + +define @intrinsic_vmfle_vf_nxv8f32_f32( %0, float %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfle_vf_nxv8f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmfle.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfle.nxv8f32.f32( + %0, + float %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmfle.mask.nxv8f32.f32( + , + , + float, + , + i32); + +define @intrinsic_vmfle_mask_vf_nxv8f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv8f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmfle.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfle.mask.nxv8f32.f32( + %0, + %1, + float %2, + %3, + i32 %4) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfle-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmfle-rv64.ll new file mode 100644 index 0000000000000..5d5f3cc4de9a8 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vmfle-rv64.ll @@ -0,0 +1,1009 @@ +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+experimental-zfh -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare @llvm.riscv.vmfle.nxv1f16( + , + , + i64); + +define @intrinsic_vmfle_vv_nxv1f16_nxv1f16( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfle_vv_nxv1f16_nxv1f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmfle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmfle.nxv1f16( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfle.mask.nxv1f16( + , + , + , + , + i64); + +define @intrinsic_vmfle_mask_vv_nxv1f16_nxv1f16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv1f16_nxv1f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmfle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmfle.nxv1f16( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmfle.mask.nxv1f16( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmfle.nxv2f16( + , + , + i64); + +define @intrinsic_vmfle_vv_nxv2f16_nxv2f16( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfle_vv_nxv2f16_nxv2f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmfle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmfle.nxv2f16( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfle.mask.nxv2f16( + , + , + , + , + i64); + +define @intrinsic_vmfle_mask_vv_nxv2f16_nxv2f16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv2f16_nxv2f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmfle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmfle.nxv2f16( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmfle.mask.nxv2f16( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmfle.nxv4f16( + , + , + i64); + +define @intrinsic_vmfle_vv_nxv4f16_nxv4f16( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfle_vv_nxv4f16_nxv4f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmfle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmfle.nxv4f16( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfle.mask.nxv4f16( + , + , + , + , + i64); + +define @intrinsic_vmfle_mask_vv_nxv4f16_nxv4f16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv4f16_nxv4f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmfle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmfle.nxv4f16( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmfle.mask.nxv4f16( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmfle.nxv8f16( + , + , + i64); + +define @intrinsic_vmfle_vv_nxv8f16_nxv8f16( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfle_vv_nxv8f16_nxv8f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmfle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmfle.nxv8f16( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfle.mask.nxv8f16( + , + , + , + , + i64); + +define @intrinsic_vmfle_mask_vv_nxv8f16_nxv8f16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv8f16_nxv8f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmfle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmfle.nxv8f16( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmfle.mask.nxv8f16( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmfle.nxv16f16( + , + , + i64); + +define @intrinsic_vmfle_vv_nxv16f16_nxv16f16( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfle_vv_nxv16f16_nxv16f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmfle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmfle.nxv16f16( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfle.mask.nxv16f16( + , + , + , + , + i64); + +define @intrinsic_vmfle_mask_vv_nxv16f16_nxv16f16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv16f16_nxv16f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmfle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmfle.nxv16f16( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmfle.mask.nxv16f16( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmfle.nxv1f32( + , + , + i64); + +define @intrinsic_vmfle_vv_nxv1f32_nxv1f32( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfle_vv_nxv1f32_nxv1f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmfle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmfle.nxv1f32( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfle.mask.nxv1f32( + , + , + , + , + i64); + +define @intrinsic_vmfle_mask_vv_nxv1f32_nxv1f32( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv1f32_nxv1f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmfle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmfle.nxv1f32( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmfle.mask.nxv1f32( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmfle.nxv2f32( + , + , + i64); + +define @intrinsic_vmfle_vv_nxv2f32_nxv2f32( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfle_vv_nxv2f32_nxv2f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmfle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmfle.nxv2f32( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfle.mask.nxv2f32( + , + , + , + , + i64); + +define @intrinsic_vmfle_mask_vv_nxv2f32_nxv2f32( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv2f32_nxv2f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmfle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmfle.nxv2f32( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmfle.mask.nxv2f32( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmfle.nxv4f32( + , + , + i64); + +define @intrinsic_vmfle_vv_nxv4f32_nxv4f32( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfle_vv_nxv4f32_nxv4f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmfle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmfle.nxv4f32( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfle.mask.nxv4f32( + , + , + , + , + i64); + +define @intrinsic_vmfle_mask_vv_nxv4f32_nxv4f32( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv4f32_nxv4f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmfle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmfle.nxv4f32( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmfle.mask.nxv4f32( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmfle.nxv8f32( + , + , + i64); + +define @intrinsic_vmfle_vv_nxv8f32_nxv8f32( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfle_vv_nxv8f32_nxv8f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmfle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmfle.nxv8f32( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfle.mask.nxv8f32( + , + , + , + , + i64); + +define @intrinsic_vmfle_mask_vv_nxv8f32_nxv8f32( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv8f32_nxv8f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmfle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmfle.nxv8f32( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmfle.mask.nxv8f32( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmfle.nxv1f64( + , + , + i64); + +define @intrinsic_vmfle_vv_nxv1f64_nxv1f64( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfle_vv_nxv1f64_nxv1f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vmfle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmfle.nxv1f64( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfle.mask.nxv1f64( + , + , + , + , + i64); + +define @intrinsic_vmfle_mask_vv_nxv1f64_nxv1f64( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv1f64_nxv1f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vmfle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmfle.nxv1f64( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmfle.mask.nxv1f64( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmfle.nxv2f64( + , + , + i64); + +define @intrinsic_vmfle_vv_nxv2f64_nxv2f64( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfle_vv_nxv2f64_nxv2f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vmfle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmfle.nxv2f64( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfle.mask.nxv2f64( + , + , + , + , + i64); + +define @intrinsic_vmfle_mask_vv_nxv2f64_nxv2f64( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv2f64_nxv2f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vmfle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmfle.nxv2f64( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmfle.mask.nxv2f64( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmfle.nxv4f64( + , + , + i64); + +define @intrinsic_vmfle_vv_nxv4f64_nxv4f64( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfle_vv_nxv4f64_nxv4f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vmfle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmfle.nxv4f64( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfle.mask.nxv4f64( + , + , + , + , + i64); + +define @intrinsic_vmfle_mask_vv_nxv4f64_nxv4f64( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv4f64_nxv4f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vmfle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmfle.nxv4f64( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmfle.mask.nxv4f64( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmfle.nxv1f16.f16( + , + half, + i64); + +define @intrinsic_vmfle_vf_nxv1f16_f16( %0, half %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfle_vf_nxv1f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmfle.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfle.nxv1f16.f16( + %0, + half %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfle.mask.nxv1f16.f16( + , + , + half, + , + i64); + +define @intrinsic_vmfle_mask_vf_nxv1f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv1f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmfle.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfle.mask.nxv1f16.f16( + %0, + %1, + half %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmfle.nxv2f16.f16( + , + half, + i64); + +define @intrinsic_vmfle_vf_nxv2f16_f16( %0, half %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfle_vf_nxv2f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmfle.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfle.nxv2f16.f16( + %0, + half %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfle.mask.nxv2f16.f16( + , + , + half, + , + i64); + +define @intrinsic_vmfle_mask_vf_nxv2f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv2f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmfle.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfle.mask.nxv2f16.f16( + %0, + %1, + half %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmfle.nxv4f16.f16( + , + half, + i64); + +define @intrinsic_vmfle_vf_nxv4f16_f16( %0, half %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfle_vf_nxv4f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmfle.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfle.nxv4f16.f16( + %0, + half %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfle.mask.nxv4f16.f16( + , + , + half, + , + i64); + +define @intrinsic_vmfle_mask_vf_nxv4f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv4f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmfle.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfle.mask.nxv4f16.f16( + %0, + %1, + half %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmfle.nxv8f16.f16( + , + half, + i64); + +define @intrinsic_vmfle_vf_nxv8f16_f16( %0, half %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfle_vf_nxv8f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmfle.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfle.nxv8f16.f16( + %0, + half %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfle.mask.nxv8f16.f16( + , + , + half, + , + i64); + +define @intrinsic_vmfle_mask_vf_nxv8f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv8f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmfle.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfle.mask.nxv8f16.f16( + %0, + %1, + half %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmfle.nxv16f16.f16( + , + half, + i64); + +define @intrinsic_vmfle_vf_nxv16f16_f16( %0, half %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfle_vf_nxv16f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmfle.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfle.nxv16f16.f16( + %0, + half %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfle.mask.nxv16f16.f16( + , + , + half, + , + i64); + +define @intrinsic_vmfle_mask_vf_nxv16f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv16f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmfle.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfle.mask.nxv16f16.f16( + %0, + %1, + half %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmfle.nxv1f32.f32( + , + float, + i64); + +define @intrinsic_vmfle_vf_nxv1f32_f32( %0, float %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfle_vf_nxv1f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmfle.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfle.nxv1f32.f32( + %0, + float %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfle.mask.nxv1f32.f32( + , + , + float, + , + i64); + +define @intrinsic_vmfle_mask_vf_nxv1f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv1f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmfle.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfle.mask.nxv1f32.f32( + %0, + %1, + float %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmfle.nxv2f32.f32( + , + float, + i64); + +define @intrinsic_vmfle_vf_nxv2f32_f32( %0, float %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfle_vf_nxv2f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmfle.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfle.nxv2f32.f32( + %0, + float %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfle.mask.nxv2f32.f32( + , + , + float, + , + i64); + +define @intrinsic_vmfle_mask_vf_nxv2f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv2f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmfle.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfle.mask.nxv2f32.f32( + %0, + %1, + float %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmfle.nxv4f32.f32( + , + float, + i64); + +define @intrinsic_vmfle_vf_nxv4f32_f32( %0, float %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfle_vf_nxv4f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmfle.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfle.nxv4f32.f32( + %0, + float %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfle.mask.nxv4f32.f32( + , + , + float, + , + i64); + +define @intrinsic_vmfle_mask_vf_nxv4f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv4f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmfle.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfle.mask.nxv4f32.f32( + %0, + %1, + float %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmfle.nxv8f32.f32( + , + float, + i64); + +define @intrinsic_vmfle_vf_nxv8f32_f32( %0, float %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfle_vf_nxv8f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmfle.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfle.nxv8f32.f32( + %0, + float %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfle.mask.nxv8f32.f32( + , + , + float, + , + i64); + +define @intrinsic_vmfle_mask_vf_nxv8f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv8f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmfle.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfle.mask.nxv8f32.f32( + %0, + %1, + float %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmfle.nxv1f64.f64( + , + double, + i64); + +define @intrinsic_vmfle_vf_nxv1f64_f64( %0, double %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfle_vf_nxv1f64_f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vmfle.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfle.nxv1f64.f64( + %0, + double %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfle.mask.nxv1f64.f64( + , + , + double, + , + i64); + +define @intrinsic_vmfle_mask_vf_nxv1f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv1f64_f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vmfle.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfle.mask.nxv1f64.f64( + %0, + %1, + double %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmfle.nxv2f64.f64( + , + double, + i64); + +define @intrinsic_vmfle_vf_nxv2f64_f64( %0, double %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfle_vf_nxv2f64_f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vmfle.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfle.nxv2f64.f64( + %0, + double %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfle.mask.nxv2f64.f64( + , + , + double, + , + i64); + +define @intrinsic_vmfle_mask_vf_nxv2f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv2f64_f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vmfle.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfle.mask.nxv2f64.f64( + %0, + %1, + double %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmfle.nxv4f64.f64( + , + double, + i64); + +define @intrinsic_vmfle_vf_nxv4f64_f64( %0, double %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfle_vf_nxv4f64_f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vmfle.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfle.nxv4f64.f64( + %0, + double %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfle.mask.nxv4f64.f64( + , + , + double, + , + i64); + +define @intrinsic_vmfle_mask_vf_nxv4f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv4f64_f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vmfle.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfle.mask.nxv4f64.f64( + %0, + %1, + double %2, + %3, + i64 %4) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vmflt-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmflt-rv32.ll new file mode 100644 index 0000000000000..95d1046eb9925 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vmflt-rv32.ll @@ -0,0 +1,757 @@ +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f,+experimental-zfh -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare @llvm.riscv.vmflt.nxv1f16( + , + , + i32); + +define @intrinsic_vmflt_vv_nxv1f16_nxv1f16( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmflt_vv_nxv1f16_nxv1f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmflt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmflt.nxv1f16( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmflt.mask.nxv1f16( + , + , + , + , + i32); + +define @intrinsic_vmflt_mask_vv_nxv1f16_nxv1f16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv1f16_nxv1f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmflt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmflt.nxv1f16( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmflt.mask.nxv1f16( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmflt.nxv2f16( + , + , + i32); + +define @intrinsic_vmflt_vv_nxv2f16_nxv2f16( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmflt_vv_nxv2f16_nxv2f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmflt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmflt.nxv2f16( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmflt.mask.nxv2f16( + , + , + , + , + i32); + +define @intrinsic_vmflt_mask_vv_nxv2f16_nxv2f16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv2f16_nxv2f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmflt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmflt.nxv2f16( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmflt.mask.nxv2f16( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmflt.nxv4f16( + , + , + i32); + +define @intrinsic_vmflt_vv_nxv4f16_nxv4f16( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmflt_vv_nxv4f16_nxv4f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmflt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmflt.nxv4f16( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmflt.mask.nxv4f16( + , + , + , + , + i32); + +define @intrinsic_vmflt_mask_vv_nxv4f16_nxv4f16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv4f16_nxv4f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmflt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmflt.nxv4f16( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmflt.mask.nxv4f16( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmflt.nxv8f16( + , + , + i32); + +define @intrinsic_vmflt_vv_nxv8f16_nxv8f16( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmflt_vv_nxv8f16_nxv8f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmflt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmflt.nxv8f16( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmflt.mask.nxv8f16( + , + , + , + , + i32); + +define @intrinsic_vmflt_mask_vv_nxv8f16_nxv8f16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv8f16_nxv8f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmflt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmflt.nxv8f16( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmflt.mask.nxv8f16( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmflt.nxv16f16( + , + , + i32); + +define @intrinsic_vmflt_vv_nxv16f16_nxv16f16( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmflt_vv_nxv16f16_nxv16f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmflt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmflt.nxv16f16( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmflt.mask.nxv16f16( + , + , + , + , + i32); + +define @intrinsic_vmflt_mask_vv_nxv16f16_nxv16f16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv16f16_nxv16f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmflt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmflt.nxv16f16( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmflt.mask.nxv16f16( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmflt.nxv1f32( + , + , + i32); + +define @intrinsic_vmflt_vv_nxv1f32_nxv1f32( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmflt_vv_nxv1f32_nxv1f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmflt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmflt.nxv1f32( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmflt.mask.nxv1f32( + , + , + , + , + i32); + +define @intrinsic_vmflt_mask_vv_nxv1f32_nxv1f32( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv1f32_nxv1f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmflt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmflt.nxv1f32( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmflt.mask.nxv1f32( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmflt.nxv2f32( + , + , + i32); + +define @intrinsic_vmflt_vv_nxv2f32_nxv2f32( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmflt_vv_nxv2f32_nxv2f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmflt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmflt.nxv2f32( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmflt.mask.nxv2f32( + , + , + , + , + i32); + +define @intrinsic_vmflt_mask_vv_nxv2f32_nxv2f32( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv2f32_nxv2f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmflt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmflt.nxv2f32( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmflt.mask.nxv2f32( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmflt.nxv4f32( + , + , + i32); + +define @intrinsic_vmflt_vv_nxv4f32_nxv4f32( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmflt_vv_nxv4f32_nxv4f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmflt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmflt.nxv4f32( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmflt.mask.nxv4f32( + , + , + , + , + i32); + +define @intrinsic_vmflt_mask_vv_nxv4f32_nxv4f32( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv4f32_nxv4f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmflt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmflt.nxv4f32( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmflt.mask.nxv4f32( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmflt.nxv8f32( + , + , + i32); + +define @intrinsic_vmflt_vv_nxv8f32_nxv8f32( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmflt_vv_nxv8f32_nxv8f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmflt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmflt.nxv8f32( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmflt.mask.nxv8f32( + , + , + , + , + i32); + +define @intrinsic_vmflt_mask_vv_nxv8f32_nxv8f32( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv8f32_nxv8f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmflt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmflt.nxv8f32( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmflt.mask.nxv8f32( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmflt.nxv1f16.f16( + , + half, + i32); + +define @intrinsic_vmflt_vf_nxv1f16_f16( %0, half %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmflt_vf_nxv1f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmflt.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmflt.nxv1f16.f16( + %0, + half %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmflt.mask.nxv1f16.f16( + , + , + half, + , + i32); + +define @intrinsic_vmflt_mask_vf_nxv1f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv1f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmflt.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmflt.mask.nxv1f16.f16( + %0, + %1, + half %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmflt.nxv2f16.f16( + , + half, + i32); + +define @intrinsic_vmflt_vf_nxv2f16_f16( %0, half %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmflt_vf_nxv2f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmflt.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmflt.nxv2f16.f16( + %0, + half %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmflt.mask.nxv2f16.f16( + , + , + half, + , + i32); + +define @intrinsic_vmflt_mask_vf_nxv2f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv2f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmflt.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmflt.mask.nxv2f16.f16( + %0, + %1, + half %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmflt.nxv4f16.f16( + , + half, + i32); + +define @intrinsic_vmflt_vf_nxv4f16_f16( %0, half %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmflt_vf_nxv4f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmflt.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmflt.nxv4f16.f16( + %0, + half %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmflt.mask.nxv4f16.f16( + , + , + half, + , + i32); + +define @intrinsic_vmflt_mask_vf_nxv4f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv4f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmflt.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmflt.mask.nxv4f16.f16( + %0, + %1, + half %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmflt.nxv8f16.f16( + , + half, + i32); + +define @intrinsic_vmflt_vf_nxv8f16_f16( %0, half %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmflt_vf_nxv8f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmflt.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmflt.nxv8f16.f16( + %0, + half %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmflt.mask.nxv8f16.f16( + , + , + half, + , + i32); + +define @intrinsic_vmflt_mask_vf_nxv8f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv8f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmflt.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmflt.mask.nxv8f16.f16( + %0, + %1, + half %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmflt.nxv16f16.f16( + , + half, + i32); + +define @intrinsic_vmflt_vf_nxv16f16_f16( %0, half %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmflt_vf_nxv16f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmflt.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmflt.nxv16f16.f16( + %0, + half %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmflt.mask.nxv16f16.f16( + , + , + half, + , + i32); + +define @intrinsic_vmflt_mask_vf_nxv16f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv16f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmflt.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmflt.mask.nxv16f16.f16( + %0, + %1, + half %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmflt.nxv1f32.f32( + , + float, + i32); + +define @intrinsic_vmflt_vf_nxv1f32_f32( %0, float %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmflt_vf_nxv1f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmflt.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmflt.nxv1f32.f32( + %0, + float %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmflt.mask.nxv1f32.f32( + , + , + float, + , + i32); + +define @intrinsic_vmflt_mask_vf_nxv1f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv1f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmflt.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmflt.mask.nxv1f32.f32( + %0, + %1, + float %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmflt.nxv2f32.f32( + , + float, + i32); + +define @intrinsic_vmflt_vf_nxv2f32_f32( %0, float %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmflt_vf_nxv2f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmflt.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmflt.nxv2f32.f32( + %0, + float %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmflt.mask.nxv2f32.f32( + , + , + float, + , + i32); + +define @intrinsic_vmflt_mask_vf_nxv2f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv2f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmflt.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmflt.mask.nxv2f32.f32( + %0, + %1, + float %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmflt.nxv4f32.f32( + , + float, + i32); + +define @intrinsic_vmflt_vf_nxv4f32_f32( %0, float %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmflt_vf_nxv4f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmflt.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmflt.nxv4f32.f32( + %0, + float %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmflt.mask.nxv4f32.f32( + , + , + float, + , + i32); + +define @intrinsic_vmflt_mask_vf_nxv4f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv4f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmflt.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmflt.mask.nxv4f32.f32( + %0, + %1, + float %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmflt.nxv8f32.f32( + , + float, + i32); + +define @intrinsic_vmflt_vf_nxv8f32_f32( %0, float %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmflt_vf_nxv8f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmflt.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmflt.nxv8f32.f32( + %0, + float %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmflt.mask.nxv8f32.f32( + , + , + float, + , + i32); + +define @intrinsic_vmflt_mask_vf_nxv8f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv8f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmflt.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmflt.mask.nxv8f32.f32( + %0, + %1, + float %2, + %3, + i32 %4) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vmflt-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmflt-rv64.ll new file mode 100644 index 0000000000000..80eebbe0c8dc9 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vmflt-rv64.ll @@ -0,0 +1,1009 @@ +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+experimental-zfh -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare @llvm.riscv.vmflt.nxv1f16( + , + , + i64); + +define @intrinsic_vmflt_vv_nxv1f16_nxv1f16( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmflt_vv_nxv1f16_nxv1f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmflt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmflt.nxv1f16( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmflt.mask.nxv1f16( + , + , + , + , + i64); + +define @intrinsic_vmflt_mask_vv_nxv1f16_nxv1f16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv1f16_nxv1f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmflt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmflt.nxv1f16( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmflt.mask.nxv1f16( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmflt.nxv2f16( + , + , + i64); + +define @intrinsic_vmflt_vv_nxv2f16_nxv2f16( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmflt_vv_nxv2f16_nxv2f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmflt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmflt.nxv2f16( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmflt.mask.nxv2f16( + , + , + , + , + i64); + +define @intrinsic_vmflt_mask_vv_nxv2f16_nxv2f16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv2f16_nxv2f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmflt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmflt.nxv2f16( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmflt.mask.nxv2f16( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmflt.nxv4f16( + , + , + i64); + +define @intrinsic_vmflt_vv_nxv4f16_nxv4f16( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmflt_vv_nxv4f16_nxv4f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmflt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmflt.nxv4f16( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmflt.mask.nxv4f16( + , + , + , + , + i64); + +define @intrinsic_vmflt_mask_vv_nxv4f16_nxv4f16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv4f16_nxv4f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmflt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmflt.nxv4f16( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmflt.mask.nxv4f16( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmflt.nxv8f16( + , + , + i64); + +define @intrinsic_vmflt_vv_nxv8f16_nxv8f16( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmflt_vv_nxv8f16_nxv8f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmflt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmflt.nxv8f16( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmflt.mask.nxv8f16( + , + , + , + , + i64); + +define @intrinsic_vmflt_mask_vv_nxv8f16_nxv8f16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv8f16_nxv8f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmflt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmflt.nxv8f16( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmflt.mask.nxv8f16( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmflt.nxv16f16( + , + , + i64); + +define @intrinsic_vmflt_vv_nxv16f16_nxv16f16( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmflt_vv_nxv16f16_nxv16f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmflt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmflt.nxv16f16( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmflt.mask.nxv16f16( + , + , + , + , + i64); + +define @intrinsic_vmflt_mask_vv_nxv16f16_nxv16f16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv16f16_nxv16f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmflt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmflt.nxv16f16( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmflt.mask.nxv16f16( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmflt.nxv1f32( + , + , + i64); + +define @intrinsic_vmflt_vv_nxv1f32_nxv1f32( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmflt_vv_nxv1f32_nxv1f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmflt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmflt.nxv1f32( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmflt.mask.nxv1f32( + , + , + , + , + i64); + +define @intrinsic_vmflt_mask_vv_nxv1f32_nxv1f32( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv1f32_nxv1f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmflt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmflt.nxv1f32( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmflt.mask.nxv1f32( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmflt.nxv2f32( + , + , + i64); + +define @intrinsic_vmflt_vv_nxv2f32_nxv2f32( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmflt_vv_nxv2f32_nxv2f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmflt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmflt.nxv2f32( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmflt.mask.nxv2f32( + , + , + , + , + i64); + +define @intrinsic_vmflt_mask_vv_nxv2f32_nxv2f32( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv2f32_nxv2f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmflt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmflt.nxv2f32( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmflt.mask.nxv2f32( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmflt.nxv4f32( + , + , + i64); + +define @intrinsic_vmflt_vv_nxv4f32_nxv4f32( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmflt_vv_nxv4f32_nxv4f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmflt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmflt.nxv4f32( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmflt.mask.nxv4f32( + , + , + , + , + i64); + +define @intrinsic_vmflt_mask_vv_nxv4f32_nxv4f32( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv4f32_nxv4f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmflt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmflt.nxv4f32( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmflt.mask.nxv4f32( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmflt.nxv8f32( + , + , + i64); + +define @intrinsic_vmflt_vv_nxv8f32_nxv8f32( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmflt_vv_nxv8f32_nxv8f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmflt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmflt.nxv8f32( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmflt.mask.nxv8f32( + , + , + , + , + i64); + +define @intrinsic_vmflt_mask_vv_nxv8f32_nxv8f32( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv8f32_nxv8f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmflt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmflt.nxv8f32( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmflt.mask.nxv8f32( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmflt.nxv1f64( + , + , + i64); + +define @intrinsic_vmflt_vv_nxv1f64_nxv1f64( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmflt_vv_nxv1f64_nxv1f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vmflt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmflt.nxv1f64( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmflt.mask.nxv1f64( + , + , + , + , + i64); + +define @intrinsic_vmflt_mask_vv_nxv1f64_nxv1f64( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv1f64_nxv1f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vmflt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmflt.nxv1f64( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmflt.mask.nxv1f64( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmflt.nxv2f64( + , + , + i64); + +define @intrinsic_vmflt_vv_nxv2f64_nxv2f64( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmflt_vv_nxv2f64_nxv2f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vmflt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmflt.nxv2f64( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmflt.mask.nxv2f64( + , + , + , + , + i64); + +define @intrinsic_vmflt_mask_vv_nxv2f64_nxv2f64( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv2f64_nxv2f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vmflt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmflt.nxv2f64( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmflt.mask.nxv2f64( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmflt.nxv4f64( + , + , + i64); + +define @intrinsic_vmflt_vv_nxv4f64_nxv4f64( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmflt_vv_nxv4f64_nxv4f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vmflt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmflt.nxv4f64( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmflt.mask.nxv4f64( + , + , + , + , + i64); + +define @intrinsic_vmflt_mask_vv_nxv4f64_nxv4f64( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv4f64_nxv4f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vmflt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmflt.nxv4f64( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmflt.mask.nxv4f64( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmflt.nxv1f16.f16( + , + half, + i64); + +define @intrinsic_vmflt_vf_nxv1f16_f16( %0, half %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmflt_vf_nxv1f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmflt.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmflt.nxv1f16.f16( + %0, + half %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmflt.mask.nxv1f16.f16( + , + , + half, + , + i64); + +define @intrinsic_vmflt_mask_vf_nxv1f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv1f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmflt.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmflt.mask.nxv1f16.f16( + %0, + %1, + half %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmflt.nxv2f16.f16( + , + half, + i64); + +define @intrinsic_vmflt_vf_nxv2f16_f16( %0, half %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmflt_vf_nxv2f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmflt.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmflt.nxv2f16.f16( + %0, + half %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmflt.mask.nxv2f16.f16( + , + , + half, + , + i64); + +define @intrinsic_vmflt_mask_vf_nxv2f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv2f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmflt.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmflt.mask.nxv2f16.f16( + %0, + %1, + half %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmflt.nxv4f16.f16( + , + half, + i64); + +define @intrinsic_vmflt_vf_nxv4f16_f16( %0, half %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmflt_vf_nxv4f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmflt.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmflt.nxv4f16.f16( + %0, + half %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmflt.mask.nxv4f16.f16( + , + , + half, + , + i64); + +define @intrinsic_vmflt_mask_vf_nxv4f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv4f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmflt.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmflt.mask.nxv4f16.f16( + %0, + %1, + half %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmflt.nxv8f16.f16( + , + half, + i64); + +define @intrinsic_vmflt_vf_nxv8f16_f16( %0, half %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmflt_vf_nxv8f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmflt.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmflt.nxv8f16.f16( + %0, + half %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmflt.mask.nxv8f16.f16( + , + , + half, + , + i64); + +define @intrinsic_vmflt_mask_vf_nxv8f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv8f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmflt.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmflt.mask.nxv8f16.f16( + %0, + %1, + half %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmflt.nxv16f16.f16( + , + half, + i64); + +define @intrinsic_vmflt_vf_nxv16f16_f16( %0, half %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmflt_vf_nxv16f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmflt.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmflt.nxv16f16.f16( + %0, + half %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmflt.mask.nxv16f16.f16( + , + , + half, + , + i64); + +define @intrinsic_vmflt_mask_vf_nxv16f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv16f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmflt.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmflt.mask.nxv16f16.f16( + %0, + %1, + half %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmflt.nxv1f32.f32( + , + float, + i64); + +define @intrinsic_vmflt_vf_nxv1f32_f32( %0, float %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmflt_vf_nxv1f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmflt.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmflt.nxv1f32.f32( + %0, + float %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmflt.mask.nxv1f32.f32( + , + , + float, + , + i64); + +define @intrinsic_vmflt_mask_vf_nxv1f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv1f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmflt.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmflt.mask.nxv1f32.f32( + %0, + %1, + float %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmflt.nxv2f32.f32( + , + float, + i64); + +define @intrinsic_vmflt_vf_nxv2f32_f32( %0, float %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmflt_vf_nxv2f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmflt.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmflt.nxv2f32.f32( + %0, + float %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmflt.mask.nxv2f32.f32( + , + , + float, + , + i64); + +define @intrinsic_vmflt_mask_vf_nxv2f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv2f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmflt.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmflt.mask.nxv2f32.f32( + %0, + %1, + float %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmflt.nxv4f32.f32( + , + float, + i64); + +define @intrinsic_vmflt_vf_nxv4f32_f32( %0, float %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmflt_vf_nxv4f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmflt.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmflt.nxv4f32.f32( + %0, + float %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmflt.mask.nxv4f32.f32( + , + , + float, + , + i64); + +define @intrinsic_vmflt_mask_vf_nxv4f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv4f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmflt.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmflt.mask.nxv4f32.f32( + %0, + %1, + float %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmflt.nxv8f32.f32( + , + float, + i64); + +define @intrinsic_vmflt_vf_nxv8f32_f32( %0, float %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmflt_vf_nxv8f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmflt.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmflt.nxv8f32.f32( + %0, + float %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmflt.mask.nxv8f32.f32( + , + , + float, + , + i64); + +define @intrinsic_vmflt_mask_vf_nxv8f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv8f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmflt.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmflt.mask.nxv8f32.f32( + %0, + %1, + float %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmflt.nxv1f64.f64( + , + double, + i64); + +define @intrinsic_vmflt_vf_nxv1f64_f64( %0, double %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmflt_vf_nxv1f64_f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vmflt.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmflt.nxv1f64.f64( + %0, + double %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmflt.mask.nxv1f64.f64( + , + , + double, + , + i64); + +define @intrinsic_vmflt_mask_vf_nxv1f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv1f64_f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vmflt.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmflt.mask.nxv1f64.f64( + %0, + %1, + double %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmflt.nxv2f64.f64( + , + double, + i64); + +define @intrinsic_vmflt_vf_nxv2f64_f64( %0, double %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmflt_vf_nxv2f64_f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vmflt.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmflt.nxv2f64.f64( + %0, + double %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmflt.mask.nxv2f64.f64( + , + , + double, + , + i64); + +define @intrinsic_vmflt_mask_vf_nxv2f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv2f64_f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vmflt.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmflt.mask.nxv2f64.f64( + %0, + %1, + double %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmflt.nxv4f64.f64( + , + double, + i64); + +define @intrinsic_vmflt_vf_nxv4f64_f64( %0, double %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmflt_vf_nxv4f64_f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vmflt.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmflt.nxv4f64.f64( + %0, + double %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmflt.mask.nxv4f64.f64( + , + , + double, + , + i64); + +define @intrinsic_vmflt_mask_vf_nxv4f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv4f64_f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vmflt.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmflt.mask.nxv4f64.f64( + %0, + %1, + double %2, + %3, + i64 %4) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfne-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmfne-rv32.ll new file mode 100644 index 0000000000000..e98d68159bf48 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vmfne-rv32.ll @@ -0,0 +1,757 @@ +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f,+experimental-zfh -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare @llvm.riscv.vmfne.nxv1f16( + , + , + i32); + +define @intrinsic_vmfne_vv_nxv1f16_nxv1f16( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfne_vv_nxv1f16_nxv1f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmfne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmfne.nxv1f16( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmfne.mask.nxv1f16( + , + , + , + , + i32); + +define @intrinsic_vmfne_mask_vv_nxv1f16_nxv1f16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv1f16_nxv1f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmfne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmfne.nxv1f16( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmfne.mask.nxv1f16( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmfne.nxv2f16( + , + , + i32); + +define @intrinsic_vmfne_vv_nxv2f16_nxv2f16( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfne_vv_nxv2f16_nxv2f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmfne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmfne.nxv2f16( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmfne.mask.nxv2f16( + , + , + , + , + i32); + +define @intrinsic_vmfne_mask_vv_nxv2f16_nxv2f16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv2f16_nxv2f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmfne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmfne.nxv2f16( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmfne.mask.nxv2f16( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmfne.nxv4f16( + , + , + i32); + +define @intrinsic_vmfne_vv_nxv4f16_nxv4f16( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfne_vv_nxv4f16_nxv4f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmfne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmfne.nxv4f16( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmfne.mask.nxv4f16( + , + , + , + , + i32); + +define @intrinsic_vmfne_mask_vv_nxv4f16_nxv4f16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv4f16_nxv4f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmfne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmfne.nxv4f16( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmfne.mask.nxv4f16( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmfne.nxv8f16( + , + , + i32); + +define @intrinsic_vmfne_vv_nxv8f16_nxv8f16( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfne_vv_nxv8f16_nxv8f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmfne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmfne.nxv8f16( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmfne.mask.nxv8f16( + , + , + , + , + i32); + +define @intrinsic_vmfne_mask_vv_nxv8f16_nxv8f16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv8f16_nxv8f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmfne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmfne.nxv8f16( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmfne.mask.nxv8f16( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmfne.nxv16f16( + , + , + i32); + +define @intrinsic_vmfne_vv_nxv16f16_nxv16f16( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfne_vv_nxv16f16_nxv16f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmfne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmfne.nxv16f16( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmfne.mask.nxv16f16( + , + , + , + , + i32); + +define @intrinsic_vmfne_mask_vv_nxv16f16_nxv16f16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv16f16_nxv16f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmfne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmfne.nxv16f16( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmfne.mask.nxv16f16( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmfne.nxv1f32( + , + , + i32); + +define @intrinsic_vmfne_vv_nxv1f32_nxv1f32( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfne_vv_nxv1f32_nxv1f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmfne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmfne.nxv1f32( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmfne.mask.nxv1f32( + , + , + , + , + i32); + +define @intrinsic_vmfne_mask_vv_nxv1f32_nxv1f32( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv1f32_nxv1f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmfne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmfne.nxv1f32( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmfne.mask.nxv1f32( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmfne.nxv2f32( + , + , + i32); + +define @intrinsic_vmfne_vv_nxv2f32_nxv2f32( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfne_vv_nxv2f32_nxv2f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmfne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmfne.nxv2f32( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmfne.mask.nxv2f32( + , + , + , + , + i32); + +define @intrinsic_vmfne_mask_vv_nxv2f32_nxv2f32( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv2f32_nxv2f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmfne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmfne.nxv2f32( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmfne.mask.nxv2f32( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmfne.nxv4f32( + , + , + i32); + +define @intrinsic_vmfne_vv_nxv4f32_nxv4f32( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfne_vv_nxv4f32_nxv4f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmfne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmfne.nxv4f32( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmfne.mask.nxv4f32( + , + , + , + , + i32); + +define @intrinsic_vmfne_mask_vv_nxv4f32_nxv4f32( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv4f32_nxv4f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmfne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmfne.nxv4f32( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmfne.mask.nxv4f32( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmfne.nxv8f32( + , + , + i32); + +define @intrinsic_vmfne_vv_nxv8f32_nxv8f32( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfne_vv_nxv8f32_nxv8f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmfne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmfne.nxv8f32( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmfne.mask.nxv8f32( + , + , + , + , + i32); + +define @intrinsic_vmfne_mask_vv_nxv8f32_nxv8f32( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv8f32_nxv8f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmfne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmfne.nxv8f32( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmfne.mask.nxv8f32( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmfne.nxv1f16.f16( + , + half, + i32); + +define @intrinsic_vmfne_vf_nxv1f16_f16( %0, half %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfne_vf_nxv1f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmfne.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfne.nxv1f16.f16( + %0, + half %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmfne.mask.nxv1f16.f16( + , + , + half, + , + i32); + +define @intrinsic_vmfne_mask_vf_nxv1f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv1f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmfne.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfne.mask.nxv1f16.f16( + %0, + %1, + half %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmfne.nxv2f16.f16( + , + half, + i32); + +define @intrinsic_vmfne_vf_nxv2f16_f16( %0, half %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfne_vf_nxv2f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmfne.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfne.nxv2f16.f16( + %0, + half %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmfne.mask.nxv2f16.f16( + , + , + half, + , + i32); + +define @intrinsic_vmfne_mask_vf_nxv2f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv2f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmfne.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfne.mask.nxv2f16.f16( + %0, + %1, + half %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmfne.nxv4f16.f16( + , + half, + i32); + +define @intrinsic_vmfne_vf_nxv4f16_f16( %0, half %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfne_vf_nxv4f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmfne.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfne.nxv4f16.f16( + %0, + half %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmfne.mask.nxv4f16.f16( + , + , + half, + , + i32); + +define @intrinsic_vmfne_mask_vf_nxv4f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv4f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmfne.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfne.mask.nxv4f16.f16( + %0, + %1, + half %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmfne.nxv8f16.f16( + , + half, + i32); + +define @intrinsic_vmfne_vf_nxv8f16_f16( %0, half %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfne_vf_nxv8f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmfne.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfne.nxv8f16.f16( + %0, + half %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmfne.mask.nxv8f16.f16( + , + , + half, + , + i32); + +define @intrinsic_vmfne_mask_vf_nxv8f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv8f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmfne.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfne.mask.nxv8f16.f16( + %0, + %1, + half %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmfne.nxv16f16.f16( + , + half, + i32); + +define @intrinsic_vmfne_vf_nxv16f16_f16( %0, half %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfne_vf_nxv16f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmfne.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfne.nxv16f16.f16( + %0, + half %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmfne.mask.nxv16f16.f16( + , + , + half, + , + i32); + +define @intrinsic_vmfne_mask_vf_nxv16f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv16f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmfne.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfne.mask.nxv16f16.f16( + %0, + %1, + half %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmfne.nxv1f32.f32( + , + float, + i32); + +define @intrinsic_vmfne_vf_nxv1f32_f32( %0, float %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfne_vf_nxv1f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmfne.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfne.nxv1f32.f32( + %0, + float %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmfne.mask.nxv1f32.f32( + , + , + float, + , + i32); + +define @intrinsic_vmfne_mask_vf_nxv1f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv1f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmfne.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfne.mask.nxv1f32.f32( + %0, + %1, + float %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmfne.nxv2f32.f32( + , + float, + i32); + +define @intrinsic_vmfne_vf_nxv2f32_f32( %0, float %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfne_vf_nxv2f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmfne.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfne.nxv2f32.f32( + %0, + float %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmfne.mask.nxv2f32.f32( + , + , + float, + , + i32); + +define @intrinsic_vmfne_mask_vf_nxv2f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv2f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmfne.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfne.mask.nxv2f32.f32( + %0, + %1, + float %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmfne.nxv4f32.f32( + , + float, + i32); + +define @intrinsic_vmfne_vf_nxv4f32_f32( %0, float %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfne_vf_nxv4f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmfne.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfne.nxv4f32.f32( + %0, + float %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmfne.mask.nxv4f32.f32( + , + , + float, + , + i32); + +define @intrinsic_vmfne_mask_vf_nxv4f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv4f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmfne.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfne.mask.nxv4f32.f32( + %0, + %1, + float %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmfne.nxv8f32.f32( + , + float, + i32); + +define @intrinsic_vmfne_vf_nxv8f32_f32( %0, float %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfne_vf_nxv8f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmfne.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfne.nxv8f32.f32( + %0, + float %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmfne.mask.nxv8f32.f32( + , + , + float, + , + i32); + +define @intrinsic_vmfne_mask_vf_nxv8f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv8f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmfne.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfne.mask.nxv8f32.f32( + %0, + %1, + float %2, + %3, + i32 %4) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfne-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmfne-rv64.ll new file mode 100644 index 0000000000000..59e85968efbfb --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vmfne-rv64.ll @@ -0,0 +1,1009 @@ +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+experimental-zfh -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare @llvm.riscv.vmfne.nxv1f16( + , + , + i64); + +define @intrinsic_vmfne_vv_nxv1f16_nxv1f16( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfne_vv_nxv1f16_nxv1f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmfne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmfne.nxv1f16( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfne.mask.nxv1f16( + , + , + , + , + i64); + +define @intrinsic_vmfne_mask_vv_nxv1f16_nxv1f16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv1f16_nxv1f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmfne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmfne.nxv1f16( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmfne.mask.nxv1f16( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmfne.nxv2f16( + , + , + i64); + +define @intrinsic_vmfne_vv_nxv2f16_nxv2f16( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfne_vv_nxv2f16_nxv2f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmfne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmfne.nxv2f16( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfne.mask.nxv2f16( + , + , + , + , + i64); + +define @intrinsic_vmfne_mask_vv_nxv2f16_nxv2f16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv2f16_nxv2f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmfne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmfne.nxv2f16( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmfne.mask.nxv2f16( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmfne.nxv4f16( + , + , + i64); + +define @intrinsic_vmfne_vv_nxv4f16_nxv4f16( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfne_vv_nxv4f16_nxv4f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmfne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmfne.nxv4f16( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfne.mask.nxv4f16( + , + , + , + , + i64); + +define @intrinsic_vmfne_mask_vv_nxv4f16_nxv4f16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv4f16_nxv4f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmfne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmfne.nxv4f16( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmfne.mask.nxv4f16( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmfne.nxv8f16( + , + , + i64); + +define @intrinsic_vmfne_vv_nxv8f16_nxv8f16( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfne_vv_nxv8f16_nxv8f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmfne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmfne.nxv8f16( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfne.mask.nxv8f16( + , + , + , + , + i64); + +define @intrinsic_vmfne_mask_vv_nxv8f16_nxv8f16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv8f16_nxv8f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmfne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmfne.nxv8f16( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmfne.mask.nxv8f16( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmfne.nxv16f16( + , + , + i64); + +define @intrinsic_vmfne_vv_nxv16f16_nxv16f16( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfne_vv_nxv16f16_nxv16f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmfne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmfne.nxv16f16( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfne.mask.nxv16f16( + , + , + , + , + i64); + +define @intrinsic_vmfne_mask_vv_nxv16f16_nxv16f16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv16f16_nxv16f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmfne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmfne.nxv16f16( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmfne.mask.nxv16f16( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmfne.nxv1f32( + , + , + i64); + +define @intrinsic_vmfne_vv_nxv1f32_nxv1f32( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfne_vv_nxv1f32_nxv1f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmfne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmfne.nxv1f32( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfne.mask.nxv1f32( + , + , + , + , + i64); + +define @intrinsic_vmfne_mask_vv_nxv1f32_nxv1f32( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv1f32_nxv1f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmfne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmfne.nxv1f32( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmfne.mask.nxv1f32( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmfne.nxv2f32( + , + , + i64); + +define @intrinsic_vmfne_vv_nxv2f32_nxv2f32( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfne_vv_nxv2f32_nxv2f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmfne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmfne.nxv2f32( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfne.mask.nxv2f32( + , + , + , + , + i64); + +define @intrinsic_vmfne_mask_vv_nxv2f32_nxv2f32( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv2f32_nxv2f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmfne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmfne.nxv2f32( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmfne.mask.nxv2f32( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmfne.nxv4f32( + , + , + i64); + +define @intrinsic_vmfne_vv_nxv4f32_nxv4f32( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfne_vv_nxv4f32_nxv4f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmfne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmfne.nxv4f32( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfne.mask.nxv4f32( + , + , + , + , + i64); + +define @intrinsic_vmfne_mask_vv_nxv4f32_nxv4f32( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv4f32_nxv4f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmfne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmfne.nxv4f32( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmfne.mask.nxv4f32( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmfne.nxv8f32( + , + , + i64); + +define @intrinsic_vmfne_vv_nxv8f32_nxv8f32( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfne_vv_nxv8f32_nxv8f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmfne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmfne.nxv8f32( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfne.mask.nxv8f32( + , + , + , + , + i64); + +define @intrinsic_vmfne_mask_vv_nxv8f32_nxv8f32( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv8f32_nxv8f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmfne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmfne.nxv8f32( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmfne.mask.nxv8f32( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmfne.nxv1f64( + , + , + i64); + +define @intrinsic_vmfne_vv_nxv1f64_nxv1f64( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfne_vv_nxv1f64_nxv1f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vmfne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmfne.nxv1f64( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfne.mask.nxv1f64( + , + , + , + , + i64); + +define @intrinsic_vmfne_mask_vv_nxv1f64_nxv1f64( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv1f64_nxv1f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vmfne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmfne.nxv1f64( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmfne.mask.nxv1f64( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmfne.nxv2f64( + , + , + i64); + +define @intrinsic_vmfne_vv_nxv2f64_nxv2f64( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfne_vv_nxv2f64_nxv2f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vmfne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmfne.nxv2f64( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfne.mask.nxv2f64( + , + , + , + , + i64); + +define @intrinsic_vmfne_mask_vv_nxv2f64_nxv2f64( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv2f64_nxv2f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vmfne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmfne.nxv2f64( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmfne.mask.nxv2f64( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmfne.nxv4f64( + , + , + i64); + +define @intrinsic_vmfne_vv_nxv4f64_nxv4f64( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfne_vv_nxv4f64_nxv4f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vmfne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmfne.nxv4f64( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfne.mask.nxv4f64( + , + , + , + , + i64); + +define @intrinsic_vmfne_mask_vv_nxv4f64_nxv4f64( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv4f64_nxv4f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vmfne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmfne.nxv4f64( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmfne.mask.nxv4f64( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmfne.nxv1f16.f16( + , + half, + i64); + +define @intrinsic_vmfne_vf_nxv1f16_f16( %0, half %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfne_vf_nxv1f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmfne.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfne.nxv1f16.f16( + %0, + half %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfne.mask.nxv1f16.f16( + , + , + half, + , + i64); + +define @intrinsic_vmfne_mask_vf_nxv1f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv1f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmfne.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfne.mask.nxv1f16.f16( + %0, + %1, + half %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmfne.nxv2f16.f16( + , + half, + i64); + +define @intrinsic_vmfne_vf_nxv2f16_f16( %0, half %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfne_vf_nxv2f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmfne.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfne.nxv2f16.f16( + %0, + half %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfne.mask.nxv2f16.f16( + , + , + half, + , + i64); + +define @intrinsic_vmfne_mask_vf_nxv2f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv2f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmfne.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfne.mask.nxv2f16.f16( + %0, + %1, + half %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmfne.nxv4f16.f16( + , + half, + i64); + +define @intrinsic_vmfne_vf_nxv4f16_f16( %0, half %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfne_vf_nxv4f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmfne.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfne.nxv4f16.f16( + %0, + half %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfne.mask.nxv4f16.f16( + , + , + half, + , + i64); + +define @intrinsic_vmfne_mask_vf_nxv4f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv4f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmfne.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfne.mask.nxv4f16.f16( + %0, + %1, + half %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmfne.nxv8f16.f16( + , + half, + i64); + +define @intrinsic_vmfne_vf_nxv8f16_f16( %0, half %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfne_vf_nxv8f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmfne.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfne.nxv8f16.f16( + %0, + half %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfne.mask.nxv8f16.f16( + , + , + half, + , + i64); + +define @intrinsic_vmfne_mask_vf_nxv8f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv8f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmfne.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfne.mask.nxv8f16.f16( + %0, + %1, + half %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmfne.nxv16f16.f16( + , + half, + i64); + +define @intrinsic_vmfne_vf_nxv16f16_f16( %0, half %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfne_vf_nxv16f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmfne.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfne.nxv16f16.f16( + %0, + half %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfne.mask.nxv16f16.f16( + , + , + half, + , + i64); + +define @intrinsic_vmfne_mask_vf_nxv16f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv16f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmfne.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfne.mask.nxv16f16.f16( + %0, + %1, + half %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmfne.nxv1f32.f32( + , + float, + i64); + +define @intrinsic_vmfne_vf_nxv1f32_f32( %0, float %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfne_vf_nxv1f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmfne.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfne.nxv1f32.f32( + %0, + float %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfne.mask.nxv1f32.f32( + , + , + float, + , + i64); + +define @intrinsic_vmfne_mask_vf_nxv1f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv1f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmfne.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfne.mask.nxv1f32.f32( + %0, + %1, + float %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmfne.nxv2f32.f32( + , + float, + i64); + +define @intrinsic_vmfne_vf_nxv2f32_f32( %0, float %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfne_vf_nxv2f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmfne.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfne.nxv2f32.f32( + %0, + float %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfne.mask.nxv2f32.f32( + , + , + float, + , + i64); + +define @intrinsic_vmfne_mask_vf_nxv2f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv2f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmfne.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfne.mask.nxv2f32.f32( + %0, + %1, + float %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmfne.nxv4f32.f32( + , + float, + i64); + +define @intrinsic_vmfne_vf_nxv4f32_f32( %0, float %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfne_vf_nxv4f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmfne.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfne.nxv4f32.f32( + %0, + float %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfne.mask.nxv4f32.f32( + , + , + float, + , + i64); + +define @intrinsic_vmfne_mask_vf_nxv4f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv4f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmfne.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfne.mask.nxv4f32.f32( + %0, + %1, + float %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmfne.nxv8f32.f32( + , + float, + i64); + +define @intrinsic_vmfne_vf_nxv8f32_f32( %0, float %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfne_vf_nxv8f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmfne.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfne.nxv8f32.f32( + %0, + float %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfne.mask.nxv8f32.f32( + , + , + float, + , + i64); + +define @intrinsic_vmfne_mask_vf_nxv8f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv8f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmfne.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfne.mask.nxv8f32.f32( + %0, + %1, + float %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmfne.nxv1f64.f64( + , + double, + i64); + +define @intrinsic_vmfne_vf_nxv1f64_f64( %0, double %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfne_vf_nxv1f64_f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vmfne.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfne.nxv1f64.f64( + %0, + double %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfne.mask.nxv1f64.f64( + , + , + double, + , + i64); + +define @intrinsic_vmfne_mask_vf_nxv1f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv1f64_f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vmfne.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfne.mask.nxv1f64.f64( + %0, + %1, + double %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmfne.nxv2f64.f64( + , + double, + i64); + +define @intrinsic_vmfne_vf_nxv2f64_f64( %0, double %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfne_vf_nxv2f64_f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vmfne.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfne.nxv2f64.f64( + %0, + double %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfne.mask.nxv2f64.f64( + , + , + double, + , + i64); + +define @intrinsic_vmfne_mask_vf_nxv2f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv2f64_f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vmfne.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfne.mask.nxv2f64.f64( + %0, + %1, + double %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmfne.nxv4f64.f64( + , + double, + i64); + +define @intrinsic_vmfne_vf_nxv4f64_f64( %0, double %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfne_vf_nxv4f64_f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vmfne.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}} + %a = call @llvm.riscv.vmfne.nxv4f64.f64( + %0, + double %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmfne.mask.nxv4f64.f64( + , + , + double, + , + i64); + +define @intrinsic_vmfne_mask_vf_nxv4f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv4f64_f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vmfne.vf {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0.t + %a = call @llvm.riscv.vmfne.mask.nxv4f64.f64( + %0, + %1, + double %2, + %3, + i64 %4) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vmseq-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmseq-rv32.ll new file mode 100644 index 0000000000000..bc96a2394d5eb --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vmseq-rv32.ll @@ -0,0 +1,1681 @@ +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare @llvm.riscv.vmseq.nxv1i8( + , + , + i32); + +define @intrinsic_vmseq_vv_nxv1i8_nxv1i8( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vv_nxv1i8_nxv1i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmseq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmseq.nxv1i8( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmseq.mask.nxv1i8( + , + , + , + , + i32); + +define @intrinsic_vmseq_mask_vv_nxv1i8_nxv1i8( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv1i8_nxv1i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmseq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmseq.nxv1i8( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmseq.mask.nxv1i8( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmseq.nxv2i8( + , + , + i32); + +define @intrinsic_vmseq_vv_nxv2i8_nxv2i8( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vv_nxv2i8_nxv2i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmseq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmseq.nxv2i8( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmseq.mask.nxv2i8( + , + , + , + , + i32); + +define @intrinsic_vmseq_mask_vv_nxv2i8_nxv2i8( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv2i8_nxv2i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmseq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmseq.nxv2i8( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmseq.mask.nxv2i8( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmseq.nxv4i8( + , + , + i32); + +define @intrinsic_vmseq_vv_nxv4i8_nxv4i8( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vv_nxv4i8_nxv4i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmseq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmseq.nxv4i8( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmseq.mask.nxv4i8( + , + , + , + , + i32); + +define @intrinsic_vmseq_mask_vv_nxv4i8_nxv4i8( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv4i8_nxv4i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmseq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmseq.nxv4i8( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmseq.mask.nxv4i8( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmseq.nxv8i8( + , + , + i32); + +define @intrinsic_vmseq_vv_nxv8i8_nxv8i8( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vv_nxv8i8_nxv8i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmseq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmseq.nxv8i8( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmseq.mask.nxv8i8( + , + , + , + , + i32); + +define @intrinsic_vmseq_mask_vv_nxv8i8_nxv8i8( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv8i8_nxv8i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmseq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmseq.nxv8i8( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmseq.mask.nxv8i8( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmseq.nxv16i8( + , + , + i32); + +define @intrinsic_vmseq_vv_nxv16i8_nxv16i8( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vv_nxv16i8_nxv16i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmseq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmseq.nxv16i8( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmseq.mask.nxv16i8( + , + , + , + , + i32); + +define @intrinsic_vmseq_mask_vv_nxv16i8_nxv16i8( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv16i8_nxv16i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmseq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmseq.nxv16i8( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmseq.mask.nxv16i8( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmseq.nxv32i8( + , + , + i32); + +define @intrinsic_vmseq_vv_nxv32i8_nxv32i8( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vv_nxv32i8_nxv32i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmseq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmseq.nxv32i8( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmseq.mask.nxv32i8( + , + , + , + , + i32); + +define @intrinsic_vmseq_mask_vv_nxv32i8_nxv32i8( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv32i8_nxv32i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmseq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmseq.nxv32i8( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmseq.mask.nxv32i8( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmseq.nxv1i16( + , + , + i32); + +define @intrinsic_vmseq_vv_nxv1i16_nxv1i16( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vv_nxv1i16_nxv1i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmseq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmseq.nxv1i16( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmseq.mask.nxv1i16( + , + , + , + , + i32); + +define @intrinsic_vmseq_mask_vv_nxv1i16_nxv1i16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv1i16_nxv1i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmseq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmseq.nxv1i16( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmseq.mask.nxv1i16( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmseq.nxv2i16( + , + , + i32); + +define @intrinsic_vmseq_vv_nxv2i16_nxv2i16( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vv_nxv2i16_nxv2i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmseq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmseq.nxv2i16( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmseq.mask.nxv2i16( + , + , + , + , + i32); + +define @intrinsic_vmseq_mask_vv_nxv2i16_nxv2i16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv2i16_nxv2i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmseq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmseq.nxv2i16( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmseq.mask.nxv2i16( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmseq.nxv4i16( + , + , + i32); + +define @intrinsic_vmseq_vv_nxv4i16_nxv4i16( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vv_nxv4i16_nxv4i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmseq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmseq.nxv4i16( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmseq.mask.nxv4i16( + , + , + , + , + i32); + +define @intrinsic_vmseq_mask_vv_nxv4i16_nxv4i16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv4i16_nxv4i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmseq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmseq.nxv4i16( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmseq.mask.nxv4i16( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmseq.nxv8i16( + , + , + i32); + +define @intrinsic_vmseq_vv_nxv8i16_nxv8i16( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vv_nxv8i16_nxv8i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmseq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmseq.nxv8i16( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmseq.mask.nxv8i16( + , + , + , + , + i32); + +define @intrinsic_vmseq_mask_vv_nxv8i16_nxv8i16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv8i16_nxv8i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmseq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmseq.nxv8i16( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmseq.mask.nxv8i16( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmseq.nxv16i16( + , + , + i32); + +define @intrinsic_vmseq_vv_nxv16i16_nxv16i16( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vv_nxv16i16_nxv16i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmseq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmseq.nxv16i16( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmseq.mask.nxv16i16( + , + , + , + , + i32); + +define @intrinsic_vmseq_mask_vv_nxv16i16_nxv16i16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv16i16_nxv16i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmseq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmseq.nxv16i16( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmseq.mask.nxv16i16( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmseq.nxv1i32( + , + , + i32); + +define @intrinsic_vmseq_vv_nxv1i32_nxv1i32( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vv_nxv1i32_nxv1i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmseq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmseq.nxv1i32( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmseq.mask.nxv1i32( + , + , + , + , + i32); + +define @intrinsic_vmseq_mask_vv_nxv1i32_nxv1i32( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv1i32_nxv1i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmseq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmseq.nxv1i32( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmseq.mask.nxv1i32( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmseq.nxv2i32( + , + , + i32); + +define @intrinsic_vmseq_vv_nxv2i32_nxv2i32( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vv_nxv2i32_nxv2i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmseq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmseq.nxv2i32( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmseq.mask.nxv2i32( + , + , + , + , + i32); + +define @intrinsic_vmseq_mask_vv_nxv2i32_nxv2i32( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv2i32_nxv2i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmseq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmseq.nxv2i32( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmseq.mask.nxv2i32( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmseq.nxv4i32( + , + , + i32); + +define @intrinsic_vmseq_vv_nxv4i32_nxv4i32( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vv_nxv4i32_nxv4i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmseq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmseq.nxv4i32( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmseq.mask.nxv4i32( + , + , + , + , + i32); + +define @intrinsic_vmseq_mask_vv_nxv4i32_nxv4i32( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv4i32_nxv4i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmseq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmseq.nxv4i32( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmseq.mask.nxv4i32( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmseq.nxv8i32( + , + , + i32); + +define @intrinsic_vmseq_vv_nxv8i32_nxv8i32( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vv_nxv8i32_nxv8i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmseq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmseq.nxv8i32( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmseq.mask.nxv8i32( + , + , + , + , + i32); + +define @intrinsic_vmseq_mask_vv_nxv8i32_nxv8i32( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv8i32_nxv8i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmseq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmseq.nxv8i32( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmseq.mask.nxv8i32( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmseq.nxv1i8.i8( + , + i8, + i32); + +define @intrinsic_vmseq_vx_nxv1i8_i8( %0, i8 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vx_nxv1i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmseq.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmseq.nxv1i8.i8( + %0, + i8 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmseq.mask.nxv1i8.i8( + , + , + i8, + , + i32); + +define @intrinsic_vmseq_mask_vx_nxv1i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv1i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmseq.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmseq.mask.nxv1i8.i8( + %0, + %1, + i8 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmseq.nxv2i8.i8( + , + i8, + i32); + +define @intrinsic_vmseq_vx_nxv2i8_i8( %0, i8 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vx_nxv2i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmseq.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmseq.nxv2i8.i8( + %0, + i8 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmseq.mask.nxv2i8.i8( + , + , + i8, + , + i32); + +define @intrinsic_vmseq_mask_vx_nxv2i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv2i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmseq.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmseq.mask.nxv2i8.i8( + %0, + %1, + i8 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmseq.nxv4i8.i8( + , + i8, + i32); + +define @intrinsic_vmseq_vx_nxv4i8_i8( %0, i8 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vx_nxv4i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmseq.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmseq.nxv4i8.i8( + %0, + i8 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmseq.mask.nxv4i8.i8( + , + , + i8, + , + i32); + +define @intrinsic_vmseq_mask_vx_nxv4i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv4i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmseq.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmseq.mask.nxv4i8.i8( + %0, + %1, + i8 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmseq.nxv8i8.i8( + , + i8, + i32); + +define @intrinsic_vmseq_vx_nxv8i8_i8( %0, i8 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vx_nxv8i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmseq.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmseq.nxv8i8.i8( + %0, + i8 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmseq.mask.nxv8i8.i8( + , + , + i8, + , + i32); + +define @intrinsic_vmseq_mask_vx_nxv8i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv8i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmseq.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmseq.mask.nxv8i8.i8( + %0, + %1, + i8 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmseq.nxv16i8.i8( + , + i8, + i32); + +define @intrinsic_vmseq_vx_nxv16i8_i8( %0, i8 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vx_nxv16i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmseq.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmseq.nxv16i8.i8( + %0, + i8 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmseq.mask.nxv16i8.i8( + , + , + i8, + , + i32); + +define @intrinsic_vmseq_mask_vx_nxv16i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv16i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmseq.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmseq.mask.nxv16i8.i8( + %0, + %1, + i8 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmseq.nxv32i8.i8( + , + i8, + i32); + +define @intrinsic_vmseq_vx_nxv32i8_i8( %0, i8 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vx_nxv32i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmseq.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmseq.nxv32i8.i8( + %0, + i8 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmseq.mask.nxv32i8.i8( + , + , + i8, + , + i32); + +define @intrinsic_vmseq_mask_vx_nxv32i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv32i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmseq.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmseq.mask.nxv32i8.i8( + %0, + %1, + i8 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmseq.nxv1i16.i16( + , + i16, + i32); + +define @intrinsic_vmseq_vx_nxv1i16_i16( %0, i16 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vx_nxv1i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmseq.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmseq.nxv1i16.i16( + %0, + i16 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmseq.mask.nxv1i16.i16( + , + , + i16, + , + i32); + +define @intrinsic_vmseq_mask_vx_nxv1i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv1i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmseq.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmseq.mask.nxv1i16.i16( + %0, + %1, + i16 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmseq.nxv2i16.i16( + , + i16, + i32); + +define @intrinsic_vmseq_vx_nxv2i16_i16( %0, i16 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vx_nxv2i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmseq.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmseq.nxv2i16.i16( + %0, + i16 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmseq.mask.nxv2i16.i16( + , + , + i16, + , + i32); + +define @intrinsic_vmseq_mask_vx_nxv2i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv2i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmseq.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmseq.mask.nxv2i16.i16( + %0, + %1, + i16 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmseq.nxv4i16.i16( + , + i16, + i32); + +define @intrinsic_vmseq_vx_nxv4i16_i16( %0, i16 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vx_nxv4i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmseq.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmseq.nxv4i16.i16( + %0, + i16 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmseq.mask.nxv4i16.i16( + , + , + i16, + , + i32); + +define @intrinsic_vmseq_mask_vx_nxv4i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv4i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmseq.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmseq.mask.nxv4i16.i16( + %0, + %1, + i16 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmseq.nxv8i16.i16( + , + i16, + i32); + +define @intrinsic_vmseq_vx_nxv8i16_i16( %0, i16 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vx_nxv8i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmseq.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmseq.nxv8i16.i16( + %0, + i16 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmseq.mask.nxv8i16.i16( + , + , + i16, + , + i32); + +define @intrinsic_vmseq_mask_vx_nxv8i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv8i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmseq.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmseq.mask.nxv8i16.i16( + %0, + %1, + i16 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmseq.nxv16i16.i16( + , + i16, + i32); + +define @intrinsic_vmseq_vx_nxv16i16_i16( %0, i16 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vx_nxv16i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmseq.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmseq.nxv16i16.i16( + %0, + i16 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmseq.mask.nxv16i16.i16( + , + , + i16, + , + i32); + +define @intrinsic_vmseq_mask_vx_nxv16i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv16i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmseq.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmseq.mask.nxv16i16.i16( + %0, + %1, + i16 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmseq.nxv1i32.i32( + , + i32, + i32); + +define @intrinsic_vmseq_vx_nxv1i32_i32( %0, i32 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vx_nxv1i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmseq.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmseq.nxv1i32.i32( + %0, + i32 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmseq.mask.nxv1i32.i32( + , + , + i32, + , + i32); + +define @intrinsic_vmseq_mask_vx_nxv1i32_i32( %0, %1, i32 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv1i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmseq.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmseq.mask.nxv1i32.i32( + %0, + %1, + i32 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmseq.nxv2i32.i32( + , + i32, + i32); + +define @intrinsic_vmseq_vx_nxv2i32_i32( %0, i32 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vx_nxv2i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmseq.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmseq.nxv2i32.i32( + %0, + i32 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmseq.mask.nxv2i32.i32( + , + , + i32, + , + i32); + +define @intrinsic_vmseq_mask_vx_nxv2i32_i32( %0, %1, i32 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv2i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmseq.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmseq.mask.nxv2i32.i32( + %0, + %1, + i32 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmseq.nxv4i32.i32( + , + i32, + i32); + +define @intrinsic_vmseq_vx_nxv4i32_i32( %0, i32 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vx_nxv4i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmseq.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmseq.nxv4i32.i32( + %0, + i32 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmseq.mask.nxv4i32.i32( + , + , + i32, + , + i32); + +define @intrinsic_vmseq_mask_vx_nxv4i32_i32( %0, %1, i32 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv4i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmseq.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmseq.mask.nxv4i32.i32( + %0, + %1, + i32 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmseq.nxv8i32.i32( + , + i32, + i32); + +define @intrinsic_vmseq_vx_nxv8i32_i32( %0, i32 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vx_nxv8i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmseq.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmseq.nxv8i32.i32( + %0, + i32 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmseq.mask.nxv8i32.i32( + , + , + i32, + , + i32); + +define @intrinsic_vmseq_mask_vx_nxv8i32_i32( %0, %1, i32 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv8i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmseq.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmseq.mask.nxv8i32.i32( + %0, + %1, + i32 %2, + %3, + i32 %4) + + ret %a +} + +define @intrinsic_vmseq_vi_nxv1i8_i8( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vi_nxv1i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmseq.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmseq.nxv1i8.i8( + %0, + i8 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmseq_mask_vi_nxv1i8_i8( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv1i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmseq.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmseq.mask.nxv1i8.i8( + %0, + %1, + i8 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vmseq_vi_nxv2i8_i8( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vi_nxv2i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmseq.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmseq.nxv2i8.i8( + %0, + i8 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmseq_mask_vi_nxv2i8_i8( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv2i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmseq.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmseq.mask.nxv2i8.i8( + %0, + %1, + i8 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vmseq_vi_nxv4i8_i8( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vi_nxv4i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmseq.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmseq.nxv4i8.i8( + %0, + i8 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmseq_mask_vi_nxv4i8_i8( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv4i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmseq.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmseq.mask.nxv4i8.i8( + %0, + %1, + i8 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vmseq_vi_nxv8i8_i8( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vi_nxv8i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmseq.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmseq.nxv8i8.i8( + %0, + i8 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmseq_mask_vi_nxv8i8_i8( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv8i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmseq.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmseq.mask.nxv8i8.i8( + %0, + %1, + i8 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vmseq_vi_nxv16i8_i8( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vi_nxv16i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmseq.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmseq.nxv16i8.i8( + %0, + i8 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmseq_mask_vi_nxv16i8_i8( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv16i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmseq.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmseq.mask.nxv16i8.i8( + %0, + %1, + i8 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vmseq_vi_nxv32i8_i8( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vi_nxv32i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmseq.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmseq.nxv32i8.i8( + %0, + i8 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmseq_mask_vi_nxv32i8_i8( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv32i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmseq.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmseq.mask.nxv32i8.i8( + %0, + %1, + i8 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vmseq_vi_nxv1i16_i16( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vi_nxv1i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmseq.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmseq.nxv1i16.i16( + %0, + i16 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmseq_mask_vi_nxv1i16_i16( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv1i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmseq.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmseq.mask.nxv1i16.i16( + %0, + %1, + i16 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vmseq_vi_nxv2i16_i16( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vi_nxv2i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmseq.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmseq.nxv2i16.i16( + %0, + i16 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmseq_mask_vi_nxv2i16_i16( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv2i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmseq.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmseq.mask.nxv2i16.i16( + %0, + %1, + i16 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vmseq_vi_nxv4i16_i16( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vi_nxv4i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmseq.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmseq.nxv4i16.i16( + %0, + i16 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmseq_mask_vi_nxv4i16_i16( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv4i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmseq.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmseq.mask.nxv4i16.i16( + %0, + %1, + i16 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vmseq_vi_nxv8i16_i16( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vi_nxv8i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmseq.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmseq.nxv8i16.i16( + %0, + i16 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmseq_mask_vi_nxv8i16_i16( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv8i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmseq.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmseq.mask.nxv8i16.i16( + %0, + %1, + i16 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vmseq_vi_nxv16i16_i16( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vi_nxv16i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmseq.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmseq.nxv16i16.i16( + %0, + i16 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmseq_mask_vi_nxv16i16_i16( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv16i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmseq.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmseq.mask.nxv16i16.i16( + %0, + %1, + i16 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vmseq_vi_nxv1i32_i32( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vi_nxv1i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmseq.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmseq.nxv1i32.i32( + %0, + i32 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmseq_mask_vi_nxv1i32_i32( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv1i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmseq.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmseq.mask.nxv1i32.i32( + %0, + %1, + i32 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vmseq_vi_nxv2i32_i32( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vi_nxv2i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmseq.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmseq.nxv2i32.i32( + %0, + i32 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmseq_mask_vi_nxv2i32_i32( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv2i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmseq.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmseq.mask.nxv2i32.i32( + %0, + %1, + i32 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vmseq_vi_nxv4i32_i32( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vi_nxv4i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmseq.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmseq.nxv4i32.i32( + %0, + i32 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmseq_mask_vi_nxv4i32_i32( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv4i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmseq.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmseq.mask.nxv4i32.i32( + %0, + %1, + i32 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vmseq_vi_nxv8i32_i32( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vi_nxv8i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmseq.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmseq.nxv8i32.i32( + %0, + i32 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmseq_mask_vi_nxv8i32_i32( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv8i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmseq.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmseq.mask.nxv8i32.i32( + %0, + %1, + i32 9, + %2, + i32 %3) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vmseq-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmseq-rv64.ll new file mode 100644 index 0000000000000..7e7fc156096aa --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vmseq-rv64.ll @@ -0,0 +1,2017 @@ +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare @llvm.riscv.vmseq.nxv1i8( + , + , + i64); + +define @intrinsic_vmseq_vv_nxv1i8_nxv1i8( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vv_nxv1i8_nxv1i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmseq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmseq.nxv1i8( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmseq.mask.nxv1i8( + , + , + , + , + i64); + +define @intrinsic_vmseq_mask_vv_nxv1i8_nxv1i8( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv1i8_nxv1i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmseq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmseq.nxv1i8( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmseq.mask.nxv1i8( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmseq.nxv2i8( + , + , + i64); + +define @intrinsic_vmseq_vv_nxv2i8_nxv2i8( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vv_nxv2i8_nxv2i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmseq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmseq.nxv2i8( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmseq.mask.nxv2i8( + , + , + , + , + i64); + +define @intrinsic_vmseq_mask_vv_nxv2i8_nxv2i8( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv2i8_nxv2i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmseq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmseq.nxv2i8( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmseq.mask.nxv2i8( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmseq.nxv4i8( + , + , + i64); + +define @intrinsic_vmseq_vv_nxv4i8_nxv4i8( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vv_nxv4i8_nxv4i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmseq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmseq.nxv4i8( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmseq.mask.nxv4i8( + , + , + , + , + i64); + +define @intrinsic_vmseq_mask_vv_nxv4i8_nxv4i8( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv4i8_nxv4i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmseq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmseq.nxv4i8( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmseq.mask.nxv4i8( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmseq.nxv8i8( + , + , + i64); + +define @intrinsic_vmseq_vv_nxv8i8_nxv8i8( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vv_nxv8i8_nxv8i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmseq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmseq.nxv8i8( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmseq.mask.nxv8i8( + , + , + , + , + i64); + +define @intrinsic_vmseq_mask_vv_nxv8i8_nxv8i8( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv8i8_nxv8i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmseq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmseq.nxv8i8( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmseq.mask.nxv8i8( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmseq.nxv16i8( + , + , + i64); + +define @intrinsic_vmseq_vv_nxv16i8_nxv16i8( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vv_nxv16i8_nxv16i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmseq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmseq.nxv16i8( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmseq.mask.nxv16i8( + , + , + , + , + i64); + +define @intrinsic_vmseq_mask_vv_nxv16i8_nxv16i8( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv16i8_nxv16i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmseq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmseq.nxv16i8( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmseq.mask.nxv16i8( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmseq.nxv32i8( + , + , + i64); + +define @intrinsic_vmseq_vv_nxv32i8_nxv32i8( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vv_nxv32i8_nxv32i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmseq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmseq.nxv32i8( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmseq.mask.nxv32i8( + , + , + , + , + i64); + +define @intrinsic_vmseq_mask_vv_nxv32i8_nxv32i8( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv32i8_nxv32i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmseq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmseq.nxv32i8( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmseq.mask.nxv32i8( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmseq.nxv1i16( + , + , + i64); + +define @intrinsic_vmseq_vv_nxv1i16_nxv1i16( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vv_nxv1i16_nxv1i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmseq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmseq.nxv1i16( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmseq.mask.nxv1i16( + , + , + , + , + i64); + +define @intrinsic_vmseq_mask_vv_nxv1i16_nxv1i16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv1i16_nxv1i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmseq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmseq.nxv1i16( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmseq.mask.nxv1i16( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmseq.nxv2i16( + , + , + i64); + +define @intrinsic_vmseq_vv_nxv2i16_nxv2i16( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vv_nxv2i16_nxv2i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmseq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmseq.nxv2i16( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmseq.mask.nxv2i16( + , + , + , + , + i64); + +define @intrinsic_vmseq_mask_vv_nxv2i16_nxv2i16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv2i16_nxv2i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmseq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmseq.nxv2i16( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmseq.mask.nxv2i16( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmseq.nxv4i16( + , + , + i64); + +define @intrinsic_vmseq_vv_nxv4i16_nxv4i16( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vv_nxv4i16_nxv4i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmseq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmseq.nxv4i16( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmseq.mask.nxv4i16( + , + , + , + , + i64); + +define @intrinsic_vmseq_mask_vv_nxv4i16_nxv4i16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv4i16_nxv4i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmseq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmseq.nxv4i16( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmseq.mask.nxv4i16( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmseq.nxv8i16( + , + , + i64); + +define @intrinsic_vmseq_vv_nxv8i16_nxv8i16( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vv_nxv8i16_nxv8i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmseq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmseq.nxv8i16( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmseq.mask.nxv8i16( + , + , + , + , + i64); + +define @intrinsic_vmseq_mask_vv_nxv8i16_nxv8i16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv8i16_nxv8i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmseq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmseq.nxv8i16( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmseq.mask.nxv8i16( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmseq.nxv16i16( + , + , + i64); + +define @intrinsic_vmseq_vv_nxv16i16_nxv16i16( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vv_nxv16i16_nxv16i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmseq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmseq.nxv16i16( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmseq.mask.nxv16i16( + , + , + , + , + i64); + +define @intrinsic_vmseq_mask_vv_nxv16i16_nxv16i16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv16i16_nxv16i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmseq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmseq.nxv16i16( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmseq.mask.nxv16i16( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmseq.nxv1i32( + , + , + i64); + +define @intrinsic_vmseq_vv_nxv1i32_nxv1i32( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vv_nxv1i32_nxv1i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmseq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmseq.nxv1i32( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmseq.mask.nxv1i32( + , + , + , + , + i64); + +define @intrinsic_vmseq_mask_vv_nxv1i32_nxv1i32( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv1i32_nxv1i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmseq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmseq.nxv1i32( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmseq.mask.nxv1i32( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmseq.nxv2i32( + , + , + i64); + +define @intrinsic_vmseq_vv_nxv2i32_nxv2i32( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vv_nxv2i32_nxv2i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmseq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmseq.nxv2i32( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmseq.mask.nxv2i32( + , + , + , + , + i64); + +define @intrinsic_vmseq_mask_vv_nxv2i32_nxv2i32( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv2i32_nxv2i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmseq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmseq.nxv2i32( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmseq.mask.nxv2i32( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmseq.nxv4i32( + , + , + i64); + +define @intrinsic_vmseq_vv_nxv4i32_nxv4i32( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vv_nxv4i32_nxv4i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmseq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmseq.nxv4i32( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmseq.mask.nxv4i32( + , + , + , + , + i64); + +define @intrinsic_vmseq_mask_vv_nxv4i32_nxv4i32( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv4i32_nxv4i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmseq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmseq.nxv4i32( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmseq.mask.nxv4i32( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmseq.nxv8i32( + , + , + i64); + +define @intrinsic_vmseq_vv_nxv8i32_nxv8i32( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vv_nxv8i32_nxv8i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmseq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmseq.nxv8i32( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmseq.mask.nxv8i32( + , + , + , + , + i64); + +define @intrinsic_vmseq_mask_vv_nxv8i32_nxv8i32( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv8i32_nxv8i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmseq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmseq.nxv8i32( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmseq.mask.nxv8i32( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmseq.nxv1i64( + , + , + i64); + +define @intrinsic_vmseq_vv_nxv1i64_nxv1i64( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vv_nxv1i64_nxv1i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vmseq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmseq.nxv1i64( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmseq.mask.nxv1i64( + , + , + , + , + i64); + +define @intrinsic_vmseq_mask_vv_nxv1i64_nxv1i64( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv1i64_nxv1i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vmseq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmseq.nxv1i64( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmseq.mask.nxv1i64( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmseq.nxv2i64( + , + , + i64); + +define @intrinsic_vmseq_vv_nxv2i64_nxv2i64( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vv_nxv2i64_nxv2i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vmseq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmseq.nxv2i64( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmseq.mask.nxv2i64( + , + , + , + , + i64); + +define @intrinsic_vmseq_mask_vv_nxv2i64_nxv2i64( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv2i64_nxv2i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vmseq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmseq.nxv2i64( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmseq.mask.nxv2i64( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmseq.nxv4i64( + , + , + i64); + +define @intrinsic_vmseq_vv_nxv4i64_nxv4i64( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vv_nxv4i64_nxv4i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vmseq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmseq.nxv4i64( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmseq.mask.nxv4i64( + , + , + , + , + i64); + +define @intrinsic_vmseq_mask_vv_nxv4i64_nxv4i64( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv4i64_nxv4i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vmseq.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmseq.nxv4i64( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmseq.mask.nxv4i64( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmseq.nxv1i8.i8( + , + i8, + i64); + +define @intrinsic_vmseq_vx_nxv1i8_i8( %0, i8 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vx_nxv1i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmseq.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmseq.nxv1i8.i8( + %0, + i8 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmseq.mask.nxv1i8.i8( + , + , + i8, + , + i64); + +define @intrinsic_vmseq_mask_vx_nxv1i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv1i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmseq.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmseq.mask.nxv1i8.i8( + %0, + %1, + i8 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmseq.nxv2i8.i8( + , + i8, + i64); + +define @intrinsic_vmseq_vx_nxv2i8_i8( %0, i8 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vx_nxv2i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmseq.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmseq.nxv2i8.i8( + %0, + i8 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmseq.mask.nxv2i8.i8( + , + , + i8, + , + i64); + +define @intrinsic_vmseq_mask_vx_nxv2i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv2i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmseq.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmseq.mask.nxv2i8.i8( + %0, + %1, + i8 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmseq.nxv4i8.i8( + , + i8, + i64); + +define @intrinsic_vmseq_vx_nxv4i8_i8( %0, i8 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vx_nxv4i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmseq.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmseq.nxv4i8.i8( + %0, + i8 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmseq.mask.nxv4i8.i8( + , + , + i8, + , + i64); + +define @intrinsic_vmseq_mask_vx_nxv4i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv4i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmseq.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmseq.mask.nxv4i8.i8( + %0, + %1, + i8 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmseq.nxv8i8.i8( + , + i8, + i64); + +define @intrinsic_vmseq_vx_nxv8i8_i8( %0, i8 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vx_nxv8i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmseq.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmseq.nxv8i8.i8( + %0, + i8 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmseq.mask.nxv8i8.i8( + , + , + i8, + , + i64); + +define @intrinsic_vmseq_mask_vx_nxv8i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv8i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmseq.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmseq.mask.nxv8i8.i8( + %0, + %1, + i8 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmseq.nxv16i8.i8( + , + i8, + i64); + +define @intrinsic_vmseq_vx_nxv16i8_i8( %0, i8 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vx_nxv16i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmseq.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmseq.nxv16i8.i8( + %0, + i8 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmseq.mask.nxv16i8.i8( + , + , + i8, + , + i64); + +define @intrinsic_vmseq_mask_vx_nxv16i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv16i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmseq.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmseq.mask.nxv16i8.i8( + %0, + %1, + i8 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmseq.nxv32i8.i8( + , + i8, + i64); + +define @intrinsic_vmseq_vx_nxv32i8_i8( %0, i8 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vx_nxv32i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmseq.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmseq.nxv32i8.i8( + %0, + i8 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmseq.mask.nxv32i8.i8( + , + , + i8, + , + i64); + +define @intrinsic_vmseq_mask_vx_nxv32i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv32i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmseq.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmseq.mask.nxv32i8.i8( + %0, + %1, + i8 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmseq.nxv1i16.i16( + , + i16, + i64); + +define @intrinsic_vmseq_vx_nxv1i16_i16( %0, i16 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vx_nxv1i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmseq.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmseq.nxv1i16.i16( + %0, + i16 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmseq.mask.nxv1i16.i16( + , + , + i16, + , + i64); + +define @intrinsic_vmseq_mask_vx_nxv1i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv1i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmseq.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmseq.mask.nxv1i16.i16( + %0, + %1, + i16 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmseq.nxv2i16.i16( + , + i16, + i64); + +define @intrinsic_vmseq_vx_nxv2i16_i16( %0, i16 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vx_nxv2i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmseq.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmseq.nxv2i16.i16( + %0, + i16 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmseq.mask.nxv2i16.i16( + , + , + i16, + , + i64); + +define @intrinsic_vmseq_mask_vx_nxv2i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv2i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmseq.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmseq.mask.nxv2i16.i16( + %0, + %1, + i16 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmseq.nxv4i16.i16( + , + i16, + i64); + +define @intrinsic_vmseq_vx_nxv4i16_i16( %0, i16 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vx_nxv4i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmseq.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmseq.nxv4i16.i16( + %0, + i16 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmseq.mask.nxv4i16.i16( + , + , + i16, + , + i64); + +define @intrinsic_vmseq_mask_vx_nxv4i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv4i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmseq.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmseq.mask.nxv4i16.i16( + %0, + %1, + i16 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmseq.nxv8i16.i16( + , + i16, + i64); + +define @intrinsic_vmseq_vx_nxv8i16_i16( %0, i16 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vx_nxv8i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmseq.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmseq.nxv8i16.i16( + %0, + i16 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmseq.mask.nxv8i16.i16( + , + , + i16, + , + i64); + +define @intrinsic_vmseq_mask_vx_nxv8i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv8i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmseq.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmseq.mask.nxv8i16.i16( + %0, + %1, + i16 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmseq.nxv16i16.i16( + , + i16, + i64); + +define @intrinsic_vmseq_vx_nxv16i16_i16( %0, i16 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vx_nxv16i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmseq.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmseq.nxv16i16.i16( + %0, + i16 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmseq.mask.nxv16i16.i16( + , + , + i16, + , + i64); + +define @intrinsic_vmseq_mask_vx_nxv16i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv16i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmseq.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmseq.mask.nxv16i16.i16( + %0, + %1, + i16 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmseq.nxv1i32.i32( + , + i32, + i64); + +define @intrinsic_vmseq_vx_nxv1i32_i32( %0, i32 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vx_nxv1i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmseq.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmseq.nxv1i32.i32( + %0, + i32 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmseq.mask.nxv1i32.i32( + , + , + i32, + , + i64); + +define @intrinsic_vmseq_mask_vx_nxv1i32_i32( %0, %1, i32 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv1i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmseq.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmseq.mask.nxv1i32.i32( + %0, + %1, + i32 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmseq.nxv2i32.i32( + , + i32, + i64); + +define @intrinsic_vmseq_vx_nxv2i32_i32( %0, i32 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vx_nxv2i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmseq.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmseq.nxv2i32.i32( + %0, + i32 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmseq.mask.nxv2i32.i32( + , + , + i32, + , + i64); + +define @intrinsic_vmseq_mask_vx_nxv2i32_i32( %0, %1, i32 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv2i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmseq.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmseq.mask.nxv2i32.i32( + %0, + %1, + i32 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmseq.nxv4i32.i32( + , + i32, + i64); + +define @intrinsic_vmseq_vx_nxv4i32_i32( %0, i32 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vx_nxv4i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmseq.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmseq.nxv4i32.i32( + %0, + i32 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmseq.mask.nxv4i32.i32( + , + , + i32, + , + i64); + +define @intrinsic_vmseq_mask_vx_nxv4i32_i32( %0, %1, i32 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv4i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmseq.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmseq.mask.nxv4i32.i32( + %0, + %1, + i32 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmseq.nxv8i32.i32( + , + i32, + i64); + +define @intrinsic_vmseq_vx_nxv8i32_i32( %0, i32 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vx_nxv8i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmseq.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmseq.nxv8i32.i32( + %0, + i32 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmseq.mask.nxv8i32.i32( + , + , + i32, + , + i64); + +define @intrinsic_vmseq_mask_vx_nxv8i32_i32( %0, %1, i32 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv8i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmseq.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmseq.mask.nxv8i32.i32( + %0, + %1, + i32 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmseq.nxv1i64.i64( + , + i64, + i64); + +define @intrinsic_vmseq_vx_nxv1i64_i64( %0, i64 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vx_nxv1i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vmseq.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmseq.nxv1i64.i64( + %0, + i64 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmseq.mask.nxv1i64.i64( + , + , + i64, + , + i64); + +define @intrinsic_vmseq_mask_vx_nxv1i64_i64( %0, %1, i64 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv1i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vmseq.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmseq.mask.nxv1i64.i64( + %0, + %1, + i64 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmseq.nxv2i64.i64( + , + i64, + i64); + +define @intrinsic_vmseq_vx_nxv2i64_i64( %0, i64 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vx_nxv2i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vmseq.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmseq.nxv2i64.i64( + %0, + i64 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmseq.mask.nxv2i64.i64( + , + , + i64, + , + i64); + +define @intrinsic_vmseq_mask_vx_nxv2i64_i64( %0, %1, i64 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv2i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vmseq.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmseq.mask.nxv2i64.i64( + %0, + %1, + i64 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmseq.nxv4i64.i64( + , + i64, + i64); + +define @intrinsic_vmseq_vx_nxv4i64_i64( %0, i64 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vx_nxv4i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vmseq.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmseq.nxv4i64.i64( + %0, + i64 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmseq.mask.nxv4i64.i64( + , + , + i64, + , + i64); + +define @intrinsic_vmseq_mask_vx_nxv4i64_i64( %0, %1, i64 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv4i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vmseq.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmseq.mask.nxv4i64.i64( + %0, + %1, + i64 %2, + %3, + i64 %4) + + ret %a +} + +define @intrinsic_vmseq_vi_nxv1i8_i8( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vi_nxv1i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmseq.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmseq.nxv1i8.i8( + %0, + i8 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmseq_mask_vi_nxv1i8_i8( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv1i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmseq.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmseq.mask.nxv1i8.i8( + %0, + %1, + i8 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmseq_vi_nxv2i8_i8( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vi_nxv2i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmseq.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmseq.nxv2i8.i8( + %0, + i8 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmseq_mask_vi_nxv2i8_i8( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv2i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmseq.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmseq.mask.nxv2i8.i8( + %0, + %1, + i8 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmseq_vi_nxv4i8_i8( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vi_nxv4i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmseq.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmseq.nxv4i8.i8( + %0, + i8 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmseq_mask_vi_nxv4i8_i8( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv4i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmseq.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmseq.mask.nxv4i8.i8( + %0, + %1, + i8 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmseq_vi_nxv8i8_i8( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vi_nxv8i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmseq.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmseq.nxv8i8.i8( + %0, + i8 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmseq_mask_vi_nxv8i8_i8( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv8i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmseq.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmseq.mask.nxv8i8.i8( + %0, + %1, + i8 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmseq_vi_nxv16i8_i8( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vi_nxv16i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmseq.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmseq.nxv16i8.i8( + %0, + i8 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmseq_mask_vi_nxv16i8_i8( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv16i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmseq.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmseq.mask.nxv16i8.i8( + %0, + %1, + i8 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmseq_vi_nxv32i8_i8( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vi_nxv32i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmseq.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmseq.nxv32i8.i8( + %0, + i8 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmseq_mask_vi_nxv32i8_i8( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv32i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmseq.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmseq.mask.nxv32i8.i8( + %0, + %1, + i8 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmseq_vi_nxv1i16_i16( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vi_nxv1i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmseq.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmseq.nxv1i16.i16( + %0, + i16 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmseq_mask_vi_nxv1i16_i16( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv1i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmseq.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmseq.mask.nxv1i16.i16( + %0, + %1, + i16 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmseq_vi_nxv2i16_i16( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vi_nxv2i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmseq.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmseq.nxv2i16.i16( + %0, + i16 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmseq_mask_vi_nxv2i16_i16( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv2i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmseq.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmseq.mask.nxv2i16.i16( + %0, + %1, + i16 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmseq_vi_nxv4i16_i16( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vi_nxv4i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmseq.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmseq.nxv4i16.i16( + %0, + i16 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmseq_mask_vi_nxv4i16_i16( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv4i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmseq.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmseq.mask.nxv4i16.i16( + %0, + %1, + i16 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmseq_vi_nxv8i16_i16( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vi_nxv8i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmseq.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmseq.nxv8i16.i16( + %0, + i16 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmseq_mask_vi_nxv8i16_i16( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv8i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmseq.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmseq.mask.nxv8i16.i16( + %0, + %1, + i16 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmseq_vi_nxv16i16_i16( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vi_nxv16i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmseq.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmseq.nxv16i16.i16( + %0, + i16 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmseq_mask_vi_nxv16i16_i16( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv16i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmseq.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmseq.mask.nxv16i16.i16( + %0, + %1, + i16 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmseq_vi_nxv1i32_i32( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vi_nxv1i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmseq.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmseq.nxv1i32.i32( + %0, + i32 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmseq_mask_vi_nxv1i32_i32( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv1i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmseq.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmseq.mask.nxv1i32.i32( + %0, + %1, + i32 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmseq_vi_nxv2i32_i32( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vi_nxv2i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmseq.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmseq.nxv2i32.i32( + %0, + i32 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmseq_mask_vi_nxv2i32_i32( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv2i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmseq.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmseq.mask.nxv2i32.i32( + %0, + %1, + i32 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmseq_vi_nxv4i32_i32( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vi_nxv4i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmseq.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmseq.nxv4i32.i32( + %0, + i32 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmseq_mask_vi_nxv4i32_i32( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv4i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmseq.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmseq.mask.nxv4i32.i32( + %0, + %1, + i32 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmseq_vi_nxv8i32_i32( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vi_nxv8i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmseq.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmseq.nxv8i32.i32( + %0, + i32 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmseq_mask_vi_nxv8i32_i32( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv8i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmseq.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmseq.mask.nxv8i32.i32( + %0, + %1, + i32 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmseq_vi_nxv1i64_i64( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vi_nxv1i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vmseq.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmseq.nxv1i64.i64( + %0, + i64 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmseq_mask_vi_nxv1i64_i64( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv1i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vmseq.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmseq.mask.nxv1i64.i64( + %0, + %1, + i64 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmseq_vi_nxv2i64_i64( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vi_nxv2i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vmseq.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmseq.nxv2i64.i64( + %0, + i64 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmseq_mask_vi_nxv2i64_i64( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv2i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vmseq.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmseq.mask.nxv2i64.i64( + %0, + %1, + i64 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmseq_vi_nxv4i64_i64( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_vi_nxv4i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vmseq.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmseq.nxv4i64.i64( + %0, + i64 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmseq_mask_vi_nxv4i64_i64( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv4i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vmseq.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmseq.mask.nxv4i64.i64( + %0, + %1, + i64 9, + %2, + i64 %3) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsgt-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmsgt-rv32.ll new file mode 100644 index 0000000000000..13d77814f9729 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vmsgt-rv32.ll @@ -0,0 +1,1021 @@ +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare @llvm.riscv.vmsgt.nxv1i8.i8( + , + i8, + i32); + +define @intrinsic_vmsgt_vx_nxv1i8_i8( %0, i8 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_vx_nxv1i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmsgt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsgt.nxv1i8.i8( + %0, + i8 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsgt.mask.nxv1i8.i8( + , + , + i8, + , + i32); + +define @intrinsic_vmsgt_mask_vx_nxv1i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv1i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmsgt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsgt.mask.nxv1i8.i8( + %0, + %1, + i8 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsgt.nxv2i8.i8( + , + i8, + i32); + +define @intrinsic_vmsgt_vx_nxv2i8_i8( %0, i8 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_vx_nxv2i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmsgt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsgt.nxv2i8.i8( + %0, + i8 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsgt.mask.nxv2i8.i8( + , + , + i8, + , + i32); + +define @intrinsic_vmsgt_mask_vx_nxv2i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv2i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmsgt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsgt.mask.nxv2i8.i8( + %0, + %1, + i8 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsgt.nxv4i8.i8( + , + i8, + i32); + +define @intrinsic_vmsgt_vx_nxv4i8_i8( %0, i8 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_vx_nxv4i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmsgt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsgt.nxv4i8.i8( + %0, + i8 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsgt.mask.nxv4i8.i8( + , + , + i8, + , + i32); + +define @intrinsic_vmsgt_mask_vx_nxv4i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv4i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmsgt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsgt.mask.nxv4i8.i8( + %0, + %1, + i8 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsgt.nxv8i8.i8( + , + i8, + i32); + +define @intrinsic_vmsgt_vx_nxv8i8_i8( %0, i8 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_vx_nxv8i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmsgt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsgt.nxv8i8.i8( + %0, + i8 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsgt.mask.nxv8i8.i8( + , + , + i8, + , + i32); + +define @intrinsic_vmsgt_mask_vx_nxv8i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv8i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmsgt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsgt.mask.nxv8i8.i8( + %0, + %1, + i8 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsgt.nxv16i8.i8( + , + i8, + i32); + +define @intrinsic_vmsgt_vx_nxv16i8_i8( %0, i8 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_vx_nxv16i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmsgt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsgt.nxv16i8.i8( + %0, + i8 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsgt.mask.nxv16i8.i8( + , + , + i8, + , + i32); + +define @intrinsic_vmsgt_mask_vx_nxv16i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv16i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmsgt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsgt.mask.nxv16i8.i8( + %0, + %1, + i8 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsgt.nxv32i8.i8( + , + i8, + i32); + +define @intrinsic_vmsgt_vx_nxv32i8_i8( %0, i8 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_vx_nxv32i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmsgt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsgt.nxv32i8.i8( + %0, + i8 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsgt.mask.nxv32i8.i8( + , + , + i8, + , + i32); + +define @intrinsic_vmsgt_mask_vx_nxv32i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv32i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmsgt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsgt.mask.nxv32i8.i8( + %0, + %1, + i8 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsgt.nxv1i16.i16( + , + i16, + i32); + +define @intrinsic_vmsgt_vx_nxv1i16_i16( %0, i16 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_vx_nxv1i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmsgt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsgt.nxv1i16.i16( + %0, + i16 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsgt.mask.nxv1i16.i16( + , + , + i16, + , + i32); + +define @intrinsic_vmsgt_mask_vx_nxv1i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv1i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmsgt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsgt.mask.nxv1i16.i16( + %0, + %1, + i16 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsgt.nxv2i16.i16( + , + i16, + i32); + +define @intrinsic_vmsgt_vx_nxv2i16_i16( %0, i16 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_vx_nxv2i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmsgt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsgt.nxv2i16.i16( + %0, + i16 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsgt.mask.nxv2i16.i16( + , + , + i16, + , + i32); + +define @intrinsic_vmsgt_mask_vx_nxv2i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv2i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmsgt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsgt.mask.nxv2i16.i16( + %0, + %1, + i16 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsgt.nxv4i16.i16( + , + i16, + i32); + +define @intrinsic_vmsgt_vx_nxv4i16_i16( %0, i16 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_vx_nxv4i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmsgt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsgt.nxv4i16.i16( + %0, + i16 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsgt.mask.nxv4i16.i16( + , + , + i16, + , + i32); + +define @intrinsic_vmsgt_mask_vx_nxv4i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv4i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmsgt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsgt.mask.nxv4i16.i16( + %0, + %1, + i16 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsgt.nxv8i16.i16( + , + i16, + i32); + +define @intrinsic_vmsgt_vx_nxv8i16_i16( %0, i16 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_vx_nxv8i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmsgt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsgt.nxv8i16.i16( + %0, + i16 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsgt.mask.nxv8i16.i16( + , + , + i16, + , + i32); + +define @intrinsic_vmsgt_mask_vx_nxv8i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv8i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmsgt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsgt.mask.nxv8i16.i16( + %0, + %1, + i16 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsgt.nxv16i16.i16( + , + i16, + i32); + +define @intrinsic_vmsgt_vx_nxv16i16_i16( %0, i16 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_vx_nxv16i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmsgt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsgt.nxv16i16.i16( + %0, + i16 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsgt.mask.nxv16i16.i16( + , + , + i16, + , + i32); + +define @intrinsic_vmsgt_mask_vx_nxv16i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv16i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmsgt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsgt.mask.nxv16i16.i16( + %0, + %1, + i16 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsgt.nxv1i32.i32( + , + i32, + i32); + +define @intrinsic_vmsgt_vx_nxv1i32_i32( %0, i32 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_vx_nxv1i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmsgt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsgt.nxv1i32.i32( + %0, + i32 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsgt.mask.nxv1i32.i32( + , + , + i32, + , + i32); + +define @intrinsic_vmsgt_mask_vx_nxv1i32_i32( %0, %1, i32 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv1i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmsgt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsgt.mask.nxv1i32.i32( + %0, + %1, + i32 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsgt.nxv2i32.i32( + , + i32, + i32); + +define @intrinsic_vmsgt_vx_nxv2i32_i32( %0, i32 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_vx_nxv2i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmsgt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsgt.nxv2i32.i32( + %0, + i32 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsgt.mask.nxv2i32.i32( + , + , + i32, + , + i32); + +define @intrinsic_vmsgt_mask_vx_nxv2i32_i32( %0, %1, i32 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv2i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmsgt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsgt.mask.nxv2i32.i32( + %0, + %1, + i32 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsgt.nxv4i32.i32( + , + i32, + i32); + +define @intrinsic_vmsgt_vx_nxv4i32_i32( %0, i32 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_vx_nxv4i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmsgt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsgt.nxv4i32.i32( + %0, + i32 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsgt.mask.nxv4i32.i32( + , + , + i32, + , + i32); + +define @intrinsic_vmsgt_mask_vx_nxv4i32_i32( %0, %1, i32 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv4i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmsgt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsgt.mask.nxv4i32.i32( + %0, + %1, + i32 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsgt.nxv8i32.i32( + , + i32, + i32); + +define @intrinsic_vmsgt_vx_nxv8i32_i32( %0, i32 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_vx_nxv8i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmsgt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsgt.nxv8i32.i32( + %0, + i32 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsgt.mask.nxv8i32.i32( + , + , + i32, + , + i32); + +define @intrinsic_vmsgt_mask_vx_nxv8i32_i32( %0, %1, i32 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv8i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmsgt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsgt.mask.nxv8i32.i32( + %0, + %1, + i32 %2, + %3, + i32 %4) + + ret %a +} + +define @intrinsic_vmsgt_vi_nxv1i8_i8( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_vi_nxv1i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmsgt.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsgt.nxv1i8.i8( + %0, + i8 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmsgt_mask_vi_nxv1i8_i8( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv1i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmsgt.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsgt.mask.nxv1i8.i8( + %0, + %1, + i8 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vmsgt_vi_nxv2i8_i8( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_vi_nxv2i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmsgt.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsgt.nxv2i8.i8( + %0, + i8 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmsgt_mask_vi_nxv2i8_i8( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv2i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmsgt.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsgt.mask.nxv2i8.i8( + %0, + %1, + i8 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vmsgt_vi_nxv4i8_i8( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_vi_nxv4i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmsgt.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsgt.nxv4i8.i8( + %0, + i8 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmsgt_mask_vi_nxv4i8_i8( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv4i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmsgt.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsgt.mask.nxv4i8.i8( + %0, + %1, + i8 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vmsgt_vi_nxv8i8_i8( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_vi_nxv8i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmsgt.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsgt.nxv8i8.i8( + %0, + i8 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmsgt_mask_vi_nxv8i8_i8( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv8i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmsgt.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsgt.mask.nxv8i8.i8( + %0, + %1, + i8 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vmsgt_vi_nxv16i8_i8( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_vi_nxv16i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmsgt.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsgt.nxv16i8.i8( + %0, + i8 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmsgt_mask_vi_nxv16i8_i8( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv16i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmsgt.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsgt.mask.nxv16i8.i8( + %0, + %1, + i8 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vmsgt_vi_nxv32i8_i8( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_vi_nxv32i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmsgt.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsgt.nxv32i8.i8( + %0, + i8 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmsgt_mask_vi_nxv32i8_i8( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv32i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmsgt.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsgt.mask.nxv32i8.i8( + %0, + %1, + i8 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vmsgt_vi_nxv1i16_i16( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_vi_nxv1i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmsgt.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsgt.nxv1i16.i16( + %0, + i16 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmsgt_mask_vi_nxv1i16_i16( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv1i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmsgt.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsgt.mask.nxv1i16.i16( + %0, + %1, + i16 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vmsgt_vi_nxv2i16_i16( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_vi_nxv2i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmsgt.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsgt.nxv2i16.i16( + %0, + i16 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmsgt_mask_vi_nxv2i16_i16( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv2i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmsgt.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsgt.mask.nxv2i16.i16( + %0, + %1, + i16 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vmsgt_vi_nxv4i16_i16( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_vi_nxv4i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmsgt.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsgt.nxv4i16.i16( + %0, + i16 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmsgt_mask_vi_nxv4i16_i16( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv4i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmsgt.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsgt.mask.nxv4i16.i16( + %0, + %1, + i16 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vmsgt_vi_nxv8i16_i16( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_vi_nxv8i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmsgt.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsgt.nxv8i16.i16( + %0, + i16 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmsgt_mask_vi_nxv8i16_i16( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv8i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmsgt.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsgt.mask.nxv8i16.i16( + %0, + %1, + i16 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vmsgt_vi_nxv16i16_i16( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_vi_nxv16i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmsgt.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsgt.nxv16i16.i16( + %0, + i16 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmsgt_mask_vi_nxv16i16_i16( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv16i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmsgt.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsgt.mask.nxv16i16.i16( + %0, + %1, + i16 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vmsgt_vi_nxv1i32_i32( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_vi_nxv1i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmsgt.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsgt.nxv1i32.i32( + %0, + i32 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmsgt_mask_vi_nxv1i32_i32( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv1i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmsgt.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsgt.mask.nxv1i32.i32( + %0, + %1, + i32 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vmsgt_vi_nxv2i32_i32( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_vi_nxv2i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmsgt.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsgt.nxv2i32.i32( + %0, + i32 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmsgt_mask_vi_nxv2i32_i32( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv2i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmsgt.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsgt.mask.nxv2i32.i32( + %0, + %1, + i32 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vmsgt_vi_nxv4i32_i32( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_vi_nxv4i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmsgt.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsgt.nxv4i32.i32( + %0, + i32 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmsgt_mask_vi_nxv4i32_i32( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv4i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmsgt.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsgt.mask.nxv4i32.i32( + %0, + %1, + i32 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vmsgt_vi_nxv8i32_i32( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_vi_nxv8i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmsgt.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsgt.nxv8i32.i32( + %0, + i32 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmsgt_mask_vi_nxv8i32_i32( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv8i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmsgt.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsgt.mask.nxv8i32.i32( + %0, + %1, + i32 9, + %2, + i32 %3) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsgt-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmsgt-rv64.ll new file mode 100644 index 0000000000000..3a05a5e04c6b8 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vmsgt-rv64.ll @@ -0,0 +1,1225 @@ +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare @llvm.riscv.vmsgt.nxv1i8.i8( + , + i8, + i64); + +define @intrinsic_vmsgt_vx_nxv1i8_i8( %0, i8 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_vx_nxv1i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmsgt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsgt.nxv1i8.i8( + %0, + i8 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsgt.mask.nxv1i8.i8( + , + , + i8, + , + i64); + +define @intrinsic_vmsgt_mask_vx_nxv1i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv1i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmsgt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsgt.mask.nxv1i8.i8( + %0, + %1, + i8 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsgt.nxv2i8.i8( + , + i8, + i64); + +define @intrinsic_vmsgt_vx_nxv2i8_i8( %0, i8 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_vx_nxv2i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmsgt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsgt.nxv2i8.i8( + %0, + i8 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsgt.mask.nxv2i8.i8( + , + , + i8, + , + i64); + +define @intrinsic_vmsgt_mask_vx_nxv2i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv2i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmsgt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsgt.mask.nxv2i8.i8( + %0, + %1, + i8 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsgt.nxv4i8.i8( + , + i8, + i64); + +define @intrinsic_vmsgt_vx_nxv4i8_i8( %0, i8 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_vx_nxv4i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmsgt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsgt.nxv4i8.i8( + %0, + i8 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsgt.mask.nxv4i8.i8( + , + , + i8, + , + i64); + +define @intrinsic_vmsgt_mask_vx_nxv4i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv4i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmsgt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsgt.mask.nxv4i8.i8( + %0, + %1, + i8 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsgt.nxv8i8.i8( + , + i8, + i64); + +define @intrinsic_vmsgt_vx_nxv8i8_i8( %0, i8 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_vx_nxv8i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmsgt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsgt.nxv8i8.i8( + %0, + i8 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsgt.mask.nxv8i8.i8( + , + , + i8, + , + i64); + +define @intrinsic_vmsgt_mask_vx_nxv8i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv8i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmsgt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsgt.mask.nxv8i8.i8( + %0, + %1, + i8 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsgt.nxv16i8.i8( + , + i8, + i64); + +define @intrinsic_vmsgt_vx_nxv16i8_i8( %0, i8 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_vx_nxv16i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmsgt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsgt.nxv16i8.i8( + %0, + i8 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsgt.mask.nxv16i8.i8( + , + , + i8, + , + i64); + +define @intrinsic_vmsgt_mask_vx_nxv16i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv16i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmsgt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsgt.mask.nxv16i8.i8( + %0, + %1, + i8 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsgt.nxv32i8.i8( + , + i8, + i64); + +define @intrinsic_vmsgt_vx_nxv32i8_i8( %0, i8 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_vx_nxv32i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmsgt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsgt.nxv32i8.i8( + %0, + i8 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsgt.mask.nxv32i8.i8( + , + , + i8, + , + i64); + +define @intrinsic_vmsgt_mask_vx_nxv32i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv32i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmsgt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsgt.mask.nxv32i8.i8( + %0, + %1, + i8 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsgt.nxv1i16.i16( + , + i16, + i64); + +define @intrinsic_vmsgt_vx_nxv1i16_i16( %0, i16 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_vx_nxv1i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmsgt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsgt.nxv1i16.i16( + %0, + i16 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsgt.mask.nxv1i16.i16( + , + , + i16, + , + i64); + +define @intrinsic_vmsgt_mask_vx_nxv1i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv1i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmsgt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsgt.mask.nxv1i16.i16( + %0, + %1, + i16 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsgt.nxv2i16.i16( + , + i16, + i64); + +define @intrinsic_vmsgt_vx_nxv2i16_i16( %0, i16 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_vx_nxv2i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmsgt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsgt.nxv2i16.i16( + %0, + i16 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsgt.mask.nxv2i16.i16( + , + , + i16, + , + i64); + +define @intrinsic_vmsgt_mask_vx_nxv2i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv2i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmsgt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsgt.mask.nxv2i16.i16( + %0, + %1, + i16 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsgt.nxv4i16.i16( + , + i16, + i64); + +define @intrinsic_vmsgt_vx_nxv4i16_i16( %0, i16 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_vx_nxv4i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmsgt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsgt.nxv4i16.i16( + %0, + i16 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsgt.mask.nxv4i16.i16( + , + , + i16, + , + i64); + +define @intrinsic_vmsgt_mask_vx_nxv4i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv4i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmsgt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsgt.mask.nxv4i16.i16( + %0, + %1, + i16 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsgt.nxv8i16.i16( + , + i16, + i64); + +define @intrinsic_vmsgt_vx_nxv8i16_i16( %0, i16 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_vx_nxv8i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmsgt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsgt.nxv8i16.i16( + %0, + i16 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsgt.mask.nxv8i16.i16( + , + , + i16, + , + i64); + +define @intrinsic_vmsgt_mask_vx_nxv8i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv8i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmsgt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsgt.mask.nxv8i16.i16( + %0, + %1, + i16 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsgt.nxv16i16.i16( + , + i16, + i64); + +define @intrinsic_vmsgt_vx_nxv16i16_i16( %0, i16 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_vx_nxv16i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmsgt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsgt.nxv16i16.i16( + %0, + i16 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsgt.mask.nxv16i16.i16( + , + , + i16, + , + i64); + +define @intrinsic_vmsgt_mask_vx_nxv16i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv16i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmsgt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsgt.mask.nxv16i16.i16( + %0, + %1, + i16 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsgt.nxv1i32.i32( + , + i32, + i64); + +define @intrinsic_vmsgt_vx_nxv1i32_i32( %0, i32 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_vx_nxv1i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmsgt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsgt.nxv1i32.i32( + %0, + i32 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsgt.mask.nxv1i32.i32( + , + , + i32, + , + i64); + +define @intrinsic_vmsgt_mask_vx_nxv1i32_i32( %0, %1, i32 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv1i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmsgt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsgt.mask.nxv1i32.i32( + %0, + %1, + i32 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsgt.nxv2i32.i32( + , + i32, + i64); + +define @intrinsic_vmsgt_vx_nxv2i32_i32( %0, i32 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_vx_nxv2i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmsgt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsgt.nxv2i32.i32( + %0, + i32 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsgt.mask.nxv2i32.i32( + , + , + i32, + , + i64); + +define @intrinsic_vmsgt_mask_vx_nxv2i32_i32( %0, %1, i32 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv2i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmsgt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsgt.mask.nxv2i32.i32( + %0, + %1, + i32 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsgt.nxv4i32.i32( + , + i32, + i64); + +define @intrinsic_vmsgt_vx_nxv4i32_i32( %0, i32 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_vx_nxv4i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmsgt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsgt.nxv4i32.i32( + %0, + i32 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsgt.mask.nxv4i32.i32( + , + , + i32, + , + i64); + +define @intrinsic_vmsgt_mask_vx_nxv4i32_i32( %0, %1, i32 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv4i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmsgt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsgt.mask.nxv4i32.i32( + %0, + %1, + i32 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsgt.nxv8i32.i32( + , + i32, + i64); + +define @intrinsic_vmsgt_vx_nxv8i32_i32( %0, i32 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_vx_nxv8i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmsgt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsgt.nxv8i32.i32( + %0, + i32 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsgt.mask.nxv8i32.i32( + , + , + i32, + , + i64); + +define @intrinsic_vmsgt_mask_vx_nxv8i32_i32( %0, %1, i32 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv8i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmsgt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsgt.mask.nxv8i32.i32( + %0, + %1, + i32 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsgt.nxv1i64.i64( + , + i64, + i64); + +define @intrinsic_vmsgt_vx_nxv1i64_i64( %0, i64 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_vx_nxv1i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vmsgt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsgt.nxv1i64.i64( + %0, + i64 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsgt.mask.nxv1i64.i64( + , + , + i64, + , + i64); + +define @intrinsic_vmsgt_mask_vx_nxv1i64_i64( %0, %1, i64 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv1i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vmsgt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsgt.mask.nxv1i64.i64( + %0, + %1, + i64 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsgt.nxv2i64.i64( + , + i64, + i64); + +define @intrinsic_vmsgt_vx_nxv2i64_i64( %0, i64 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_vx_nxv2i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vmsgt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsgt.nxv2i64.i64( + %0, + i64 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsgt.mask.nxv2i64.i64( + , + , + i64, + , + i64); + +define @intrinsic_vmsgt_mask_vx_nxv2i64_i64( %0, %1, i64 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv2i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vmsgt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsgt.mask.nxv2i64.i64( + %0, + %1, + i64 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsgt.nxv4i64.i64( + , + i64, + i64); + +define @intrinsic_vmsgt_vx_nxv4i64_i64( %0, i64 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_vx_nxv4i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vmsgt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsgt.nxv4i64.i64( + %0, + i64 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsgt.mask.nxv4i64.i64( + , + , + i64, + , + i64); + +define @intrinsic_vmsgt_mask_vx_nxv4i64_i64( %0, %1, i64 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv4i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vmsgt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsgt.mask.nxv4i64.i64( + %0, + %1, + i64 %2, + %3, + i64 %4) + + ret %a +} + +define @intrinsic_vmsgt_vi_nxv1i8_i8( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_vi_nxv1i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmsgt.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsgt.nxv1i8.i8( + %0, + i8 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmsgt_mask_vi_nxv1i8_i8( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv1i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmsgt.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsgt.mask.nxv1i8.i8( + %0, + %1, + i8 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmsgt_vi_nxv2i8_i8( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_vi_nxv2i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmsgt.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsgt.nxv2i8.i8( + %0, + i8 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmsgt_mask_vi_nxv2i8_i8( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv2i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmsgt.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsgt.mask.nxv2i8.i8( + %0, + %1, + i8 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmsgt_vi_nxv4i8_i8( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_vi_nxv4i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmsgt.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsgt.nxv4i8.i8( + %0, + i8 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmsgt_mask_vi_nxv4i8_i8( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv4i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmsgt.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsgt.mask.nxv4i8.i8( + %0, + %1, + i8 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmsgt_vi_nxv8i8_i8( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_vi_nxv8i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmsgt.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsgt.nxv8i8.i8( + %0, + i8 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmsgt_mask_vi_nxv8i8_i8( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv8i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmsgt.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsgt.mask.nxv8i8.i8( + %0, + %1, + i8 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmsgt_vi_nxv16i8_i8( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_vi_nxv16i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmsgt.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsgt.nxv16i8.i8( + %0, + i8 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmsgt_mask_vi_nxv16i8_i8( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv16i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmsgt.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsgt.mask.nxv16i8.i8( + %0, + %1, + i8 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmsgt_vi_nxv32i8_i8( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_vi_nxv32i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmsgt.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsgt.nxv32i8.i8( + %0, + i8 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmsgt_mask_vi_nxv32i8_i8( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv32i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmsgt.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsgt.mask.nxv32i8.i8( + %0, + %1, + i8 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmsgt_vi_nxv1i16_i16( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_vi_nxv1i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmsgt.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsgt.nxv1i16.i16( + %0, + i16 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmsgt_mask_vi_nxv1i16_i16( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv1i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmsgt.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsgt.mask.nxv1i16.i16( + %0, + %1, + i16 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmsgt_vi_nxv2i16_i16( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_vi_nxv2i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmsgt.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsgt.nxv2i16.i16( + %0, + i16 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmsgt_mask_vi_nxv2i16_i16( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv2i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmsgt.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsgt.mask.nxv2i16.i16( + %0, + %1, + i16 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmsgt_vi_nxv4i16_i16( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_vi_nxv4i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmsgt.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsgt.nxv4i16.i16( + %0, + i16 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmsgt_mask_vi_nxv4i16_i16( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv4i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmsgt.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsgt.mask.nxv4i16.i16( + %0, + %1, + i16 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmsgt_vi_nxv8i16_i16( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_vi_nxv8i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmsgt.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsgt.nxv8i16.i16( + %0, + i16 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmsgt_mask_vi_nxv8i16_i16( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv8i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmsgt.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsgt.mask.nxv8i16.i16( + %0, + %1, + i16 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmsgt_vi_nxv16i16_i16( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_vi_nxv16i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmsgt.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsgt.nxv16i16.i16( + %0, + i16 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmsgt_mask_vi_nxv16i16_i16( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv16i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmsgt.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsgt.mask.nxv16i16.i16( + %0, + %1, + i16 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmsgt_vi_nxv1i32_i32( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_vi_nxv1i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmsgt.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsgt.nxv1i32.i32( + %0, + i32 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmsgt_mask_vi_nxv1i32_i32( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv1i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmsgt.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsgt.mask.nxv1i32.i32( + %0, + %1, + i32 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmsgt_vi_nxv2i32_i32( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_vi_nxv2i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmsgt.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsgt.nxv2i32.i32( + %0, + i32 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmsgt_mask_vi_nxv2i32_i32( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv2i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmsgt.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsgt.mask.nxv2i32.i32( + %0, + %1, + i32 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmsgt_vi_nxv4i32_i32( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_vi_nxv4i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmsgt.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsgt.nxv4i32.i32( + %0, + i32 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmsgt_mask_vi_nxv4i32_i32( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv4i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmsgt.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsgt.mask.nxv4i32.i32( + %0, + %1, + i32 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmsgt_vi_nxv8i32_i32( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_vi_nxv8i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmsgt.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsgt.nxv8i32.i32( + %0, + i32 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmsgt_mask_vi_nxv8i32_i32( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv8i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmsgt.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsgt.mask.nxv8i32.i32( + %0, + %1, + i32 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmsgt_vi_nxv1i64_i64( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_vi_nxv1i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vmsgt.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsgt.nxv1i64.i64( + %0, + i64 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmsgt_mask_vi_nxv1i64_i64( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv1i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vmsgt.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsgt.mask.nxv1i64.i64( + %0, + %1, + i64 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmsgt_vi_nxv2i64_i64( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_vi_nxv2i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vmsgt.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsgt.nxv2i64.i64( + %0, + i64 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmsgt_mask_vi_nxv2i64_i64( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv2i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vmsgt.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsgt.mask.nxv2i64.i64( + %0, + %1, + i64 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmsgt_vi_nxv4i64_i64( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_vi_nxv4i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vmsgt.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsgt.nxv4i64.i64( + %0, + i64 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmsgt_mask_vi_nxv4i64_i64( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv4i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vmsgt.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsgt.mask.nxv4i64.i64( + %0, + %1, + i64 9, + %2, + i64 %3) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsgtu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmsgtu-rv32.ll new file mode 100644 index 0000000000000..6cd44a8d2d896 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vmsgtu-rv32.ll @@ -0,0 +1,1021 @@ +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare @llvm.riscv.vmsgtu.nxv1i8.i8( + , + i8, + i32); + +define @intrinsic_vmsgtu_vx_nxv1i8_i8( %0, i8 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_vx_nxv1i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmsgtu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsgtu.nxv1i8.i8( + %0, + i8 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsgtu.mask.nxv1i8.i8( + , + , + i8, + , + i32); + +define @intrinsic_vmsgtu_mask_vx_nxv1i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv1i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmsgtu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsgtu.mask.nxv1i8.i8( + %0, + %1, + i8 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsgtu.nxv2i8.i8( + , + i8, + i32); + +define @intrinsic_vmsgtu_vx_nxv2i8_i8( %0, i8 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_vx_nxv2i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmsgtu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsgtu.nxv2i8.i8( + %0, + i8 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsgtu.mask.nxv2i8.i8( + , + , + i8, + , + i32); + +define @intrinsic_vmsgtu_mask_vx_nxv2i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv2i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmsgtu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsgtu.mask.nxv2i8.i8( + %0, + %1, + i8 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsgtu.nxv4i8.i8( + , + i8, + i32); + +define @intrinsic_vmsgtu_vx_nxv4i8_i8( %0, i8 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_vx_nxv4i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmsgtu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsgtu.nxv4i8.i8( + %0, + i8 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsgtu.mask.nxv4i8.i8( + , + , + i8, + , + i32); + +define @intrinsic_vmsgtu_mask_vx_nxv4i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv4i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmsgtu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsgtu.mask.nxv4i8.i8( + %0, + %1, + i8 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsgtu.nxv8i8.i8( + , + i8, + i32); + +define @intrinsic_vmsgtu_vx_nxv8i8_i8( %0, i8 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_vx_nxv8i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmsgtu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsgtu.nxv8i8.i8( + %0, + i8 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsgtu.mask.nxv8i8.i8( + , + , + i8, + , + i32); + +define @intrinsic_vmsgtu_mask_vx_nxv8i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv8i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmsgtu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsgtu.mask.nxv8i8.i8( + %0, + %1, + i8 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsgtu.nxv16i8.i8( + , + i8, + i32); + +define @intrinsic_vmsgtu_vx_nxv16i8_i8( %0, i8 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_vx_nxv16i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmsgtu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsgtu.nxv16i8.i8( + %0, + i8 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsgtu.mask.nxv16i8.i8( + , + , + i8, + , + i32); + +define @intrinsic_vmsgtu_mask_vx_nxv16i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv16i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmsgtu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsgtu.mask.nxv16i8.i8( + %0, + %1, + i8 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsgtu.nxv32i8.i8( + , + i8, + i32); + +define @intrinsic_vmsgtu_vx_nxv32i8_i8( %0, i8 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_vx_nxv32i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmsgtu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsgtu.nxv32i8.i8( + %0, + i8 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsgtu.mask.nxv32i8.i8( + , + , + i8, + , + i32); + +define @intrinsic_vmsgtu_mask_vx_nxv32i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv32i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmsgtu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsgtu.mask.nxv32i8.i8( + %0, + %1, + i8 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsgtu.nxv1i16.i16( + , + i16, + i32); + +define @intrinsic_vmsgtu_vx_nxv1i16_i16( %0, i16 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_vx_nxv1i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmsgtu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsgtu.nxv1i16.i16( + %0, + i16 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsgtu.mask.nxv1i16.i16( + , + , + i16, + , + i32); + +define @intrinsic_vmsgtu_mask_vx_nxv1i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv1i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmsgtu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsgtu.mask.nxv1i16.i16( + %0, + %1, + i16 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsgtu.nxv2i16.i16( + , + i16, + i32); + +define @intrinsic_vmsgtu_vx_nxv2i16_i16( %0, i16 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_vx_nxv2i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmsgtu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsgtu.nxv2i16.i16( + %0, + i16 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsgtu.mask.nxv2i16.i16( + , + , + i16, + , + i32); + +define @intrinsic_vmsgtu_mask_vx_nxv2i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv2i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmsgtu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsgtu.mask.nxv2i16.i16( + %0, + %1, + i16 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsgtu.nxv4i16.i16( + , + i16, + i32); + +define @intrinsic_vmsgtu_vx_nxv4i16_i16( %0, i16 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_vx_nxv4i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmsgtu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsgtu.nxv4i16.i16( + %0, + i16 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsgtu.mask.nxv4i16.i16( + , + , + i16, + , + i32); + +define @intrinsic_vmsgtu_mask_vx_nxv4i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv4i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmsgtu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsgtu.mask.nxv4i16.i16( + %0, + %1, + i16 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsgtu.nxv8i16.i16( + , + i16, + i32); + +define @intrinsic_vmsgtu_vx_nxv8i16_i16( %0, i16 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_vx_nxv8i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmsgtu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsgtu.nxv8i16.i16( + %0, + i16 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsgtu.mask.nxv8i16.i16( + , + , + i16, + , + i32); + +define @intrinsic_vmsgtu_mask_vx_nxv8i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv8i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmsgtu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsgtu.mask.nxv8i16.i16( + %0, + %1, + i16 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsgtu.nxv16i16.i16( + , + i16, + i32); + +define @intrinsic_vmsgtu_vx_nxv16i16_i16( %0, i16 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_vx_nxv16i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmsgtu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsgtu.nxv16i16.i16( + %0, + i16 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsgtu.mask.nxv16i16.i16( + , + , + i16, + , + i32); + +define @intrinsic_vmsgtu_mask_vx_nxv16i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv16i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmsgtu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsgtu.mask.nxv16i16.i16( + %0, + %1, + i16 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsgtu.nxv1i32.i32( + , + i32, + i32); + +define @intrinsic_vmsgtu_vx_nxv1i32_i32( %0, i32 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_vx_nxv1i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmsgtu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsgtu.nxv1i32.i32( + %0, + i32 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsgtu.mask.nxv1i32.i32( + , + , + i32, + , + i32); + +define @intrinsic_vmsgtu_mask_vx_nxv1i32_i32( %0, %1, i32 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv1i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmsgtu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsgtu.mask.nxv1i32.i32( + %0, + %1, + i32 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsgtu.nxv2i32.i32( + , + i32, + i32); + +define @intrinsic_vmsgtu_vx_nxv2i32_i32( %0, i32 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_vx_nxv2i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmsgtu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsgtu.nxv2i32.i32( + %0, + i32 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsgtu.mask.nxv2i32.i32( + , + , + i32, + , + i32); + +define @intrinsic_vmsgtu_mask_vx_nxv2i32_i32( %0, %1, i32 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv2i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmsgtu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsgtu.mask.nxv2i32.i32( + %0, + %1, + i32 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsgtu.nxv4i32.i32( + , + i32, + i32); + +define @intrinsic_vmsgtu_vx_nxv4i32_i32( %0, i32 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_vx_nxv4i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmsgtu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsgtu.nxv4i32.i32( + %0, + i32 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsgtu.mask.nxv4i32.i32( + , + , + i32, + , + i32); + +define @intrinsic_vmsgtu_mask_vx_nxv4i32_i32( %0, %1, i32 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv4i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmsgtu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsgtu.mask.nxv4i32.i32( + %0, + %1, + i32 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsgtu.nxv8i32.i32( + , + i32, + i32); + +define @intrinsic_vmsgtu_vx_nxv8i32_i32( %0, i32 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_vx_nxv8i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmsgtu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsgtu.nxv8i32.i32( + %0, + i32 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsgtu.mask.nxv8i32.i32( + , + , + i32, + , + i32); + +define @intrinsic_vmsgtu_mask_vx_nxv8i32_i32( %0, %1, i32 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv8i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmsgtu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsgtu.mask.nxv8i32.i32( + %0, + %1, + i32 %2, + %3, + i32 %4) + + ret %a +} + +define @intrinsic_vmsgtu_vi_nxv1i8_i8( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_vi_nxv1i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmsgtu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsgtu.nxv1i8.i8( + %0, + i8 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmsgtu_mask_vi_nxv1i8_i8( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv1i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmsgtu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsgtu.mask.nxv1i8.i8( + %0, + %1, + i8 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vmsgtu_vi_nxv2i8_i8( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_vi_nxv2i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmsgtu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsgtu.nxv2i8.i8( + %0, + i8 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmsgtu_mask_vi_nxv2i8_i8( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv2i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmsgtu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsgtu.mask.nxv2i8.i8( + %0, + %1, + i8 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vmsgtu_vi_nxv4i8_i8( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_vi_nxv4i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmsgtu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsgtu.nxv4i8.i8( + %0, + i8 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmsgtu_mask_vi_nxv4i8_i8( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv4i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmsgtu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsgtu.mask.nxv4i8.i8( + %0, + %1, + i8 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vmsgtu_vi_nxv8i8_i8( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_vi_nxv8i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmsgtu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsgtu.nxv8i8.i8( + %0, + i8 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmsgtu_mask_vi_nxv8i8_i8( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv8i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmsgtu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsgtu.mask.nxv8i8.i8( + %0, + %1, + i8 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vmsgtu_vi_nxv16i8_i8( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_vi_nxv16i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmsgtu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsgtu.nxv16i8.i8( + %0, + i8 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmsgtu_mask_vi_nxv16i8_i8( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv16i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmsgtu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsgtu.mask.nxv16i8.i8( + %0, + %1, + i8 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vmsgtu_vi_nxv32i8_i8( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_vi_nxv32i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmsgtu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsgtu.nxv32i8.i8( + %0, + i8 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmsgtu_mask_vi_nxv32i8_i8( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv32i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmsgtu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsgtu.mask.nxv32i8.i8( + %0, + %1, + i8 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vmsgtu_vi_nxv1i16_i16( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_vi_nxv1i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmsgtu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsgtu.nxv1i16.i16( + %0, + i16 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmsgtu_mask_vi_nxv1i16_i16( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv1i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmsgtu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsgtu.mask.nxv1i16.i16( + %0, + %1, + i16 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vmsgtu_vi_nxv2i16_i16( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_vi_nxv2i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmsgtu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsgtu.nxv2i16.i16( + %0, + i16 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmsgtu_mask_vi_nxv2i16_i16( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv2i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmsgtu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsgtu.mask.nxv2i16.i16( + %0, + %1, + i16 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vmsgtu_vi_nxv4i16_i16( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_vi_nxv4i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmsgtu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsgtu.nxv4i16.i16( + %0, + i16 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmsgtu_mask_vi_nxv4i16_i16( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv4i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmsgtu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsgtu.mask.nxv4i16.i16( + %0, + %1, + i16 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vmsgtu_vi_nxv8i16_i16( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_vi_nxv8i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmsgtu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsgtu.nxv8i16.i16( + %0, + i16 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmsgtu_mask_vi_nxv8i16_i16( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv8i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmsgtu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsgtu.mask.nxv8i16.i16( + %0, + %1, + i16 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vmsgtu_vi_nxv16i16_i16( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_vi_nxv16i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmsgtu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsgtu.nxv16i16.i16( + %0, + i16 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmsgtu_mask_vi_nxv16i16_i16( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv16i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmsgtu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsgtu.mask.nxv16i16.i16( + %0, + %1, + i16 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vmsgtu_vi_nxv1i32_i32( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_vi_nxv1i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmsgtu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsgtu.nxv1i32.i32( + %0, + i32 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmsgtu_mask_vi_nxv1i32_i32( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv1i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmsgtu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsgtu.mask.nxv1i32.i32( + %0, + %1, + i32 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vmsgtu_vi_nxv2i32_i32( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_vi_nxv2i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmsgtu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsgtu.nxv2i32.i32( + %0, + i32 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmsgtu_mask_vi_nxv2i32_i32( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv2i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmsgtu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsgtu.mask.nxv2i32.i32( + %0, + %1, + i32 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vmsgtu_vi_nxv4i32_i32( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_vi_nxv4i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmsgtu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsgtu.nxv4i32.i32( + %0, + i32 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmsgtu_mask_vi_nxv4i32_i32( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv4i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmsgtu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsgtu.mask.nxv4i32.i32( + %0, + %1, + i32 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vmsgtu_vi_nxv8i32_i32( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_vi_nxv8i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmsgtu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsgtu.nxv8i32.i32( + %0, + i32 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmsgtu_mask_vi_nxv8i32_i32( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv8i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmsgtu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsgtu.mask.nxv8i32.i32( + %0, + %1, + i32 9, + %2, + i32 %3) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsgtu-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmsgtu-rv64.ll new file mode 100644 index 0000000000000..ca2d4c3c21568 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vmsgtu-rv64.ll @@ -0,0 +1,1225 @@ +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare @llvm.riscv.vmsgtu.nxv1i8.i8( + , + i8, + i64); + +define @intrinsic_vmsgtu_vx_nxv1i8_i8( %0, i8 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_vx_nxv1i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmsgtu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsgtu.nxv1i8.i8( + %0, + i8 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsgtu.mask.nxv1i8.i8( + , + , + i8, + , + i64); + +define @intrinsic_vmsgtu_mask_vx_nxv1i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv1i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmsgtu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsgtu.mask.nxv1i8.i8( + %0, + %1, + i8 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsgtu.nxv2i8.i8( + , + i8, + i64); + +define @intrinsic_vmsgtu_vx_nxv2i8_i8( %0, i8 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_vx_nxv2i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmsgtu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsgtu.nxv2i8.i8( + %0, + i8 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsgtu.mask.nxv2i8.i8( + , + , + i8, + , + i64); + +define @intrinsic_vmsgtu_mask_vx_nxv2i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv2i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmsgtu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsgtu.mask.nxv2i8.i8( + %0, + %1, + i8 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsgtu.nxv4i8.i8( + , + i8, + i64); + +define @intrinsic_vmsgtu_vx_nxv4i8_i8( %0, i8 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_vx_nxv4i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmsgtu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsgtu.nxv4i8.i8( + %0, + i8 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsgtu.mask.nxv4i8.i8( + , + , + i8, + , + i64); + +define @intrinsic_vmsgtu_mask_vx_nxv4i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv4i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmsgtu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsgtu.mask.nxv4i8.i8( + %0, + %1, + i8 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsgtu.nxv8i8.i8( + , + i8, + i64); + +define @intrinsic_vmsgtu_vx_nxv8i8_i8( %0, i8 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_vx_nxv8i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmsgtu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsgtu.nxv8i8.i8( + %0, + i8 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsgtu.mask.nxv8i8.i8( + , + , + i8, + , + i64); + +define @intrinsic_vmsgtu_mask_vx_nxv8i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv8i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmsgtu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsgtu.mask.nxv8i8.i8( + %0, + %1, + i8 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsgtu.nxv16i8.i8( + , + i8, + i64); + +define @intrinsic_vmsgtu_vx_nxv16i8_i8( %0, i8 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_vx_nxv16i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmsgtu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsgtu.nxv16i8.i8( + %0, + i8 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsgtu.mask.nxv16i8.i8( + , + , + i8, + , + i64); + +define @intrinsic_vmsgtu_mask_vx_nxv16i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv16i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmsgtu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsgtu.mask.nxv16i8.i8( + %0, + %1, + i8 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsgtu.nxv32i8.i8( + , + i8, + i64); + +define @intrinsic_vmsgtu_vx_nxv32i8_i8( %0, i8 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_vx_nxv32i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmsgtu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsgtu.nxv32i8.i8( + %0, + i8 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsgtu.mask.nxv32i8.i8( + , + , + i8, + , + i64); + +define @intrinsic_vmsgtu_mask_vx_nxv32i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv32i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmsgtu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsgtu.mask.nxv32i8.i8( + %0, + %1, + i8 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsgtu.nxv1i16.i16( + , + i16, + i64); + +define @intrinsic_vmsgtu_vx_nxv1i16_i16( %0, i16 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_vx_nxv1i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmsgtu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsgtu.nxv1i16.i16( + %0, + i16 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsgtu.mask.nxv1i16.i16( + , + , + i16, + , + i64); + +define @intrinsic_vmsgtu_mask_vx_nxv1i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv1i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmsgtu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsgtu.mask.nxv1i16.i16( + %0, + %1, + i16 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsgtu.nxv2i16.i16( + , + i16, + i64); + +define @intrinsic_vmsgtu_vx_nxv2i16_i16( %0, i16 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_vx_nxv2i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmsgtu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsgtu.nxv2i16.i16( + %0, + i16 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsgtu.mask.nxv2i16.i16( + , + , + i16, + , + i64); + +define @intrinsic_vmsgtu_mask_vx_nxv2i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv2i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmsgtu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsgtu.mask.nxv2i16.i16( + %0, + %1, + i16 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsgtu.nxv4i16.i16( + , + i16, + i64); + +define @intrinsic_vmsgtu_vx_nxv4i16_i16( %0, i16 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_vx_nxv4i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmsgtu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsgtu.nxv4i16.i16( + %0, + i16 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsgtu.mask.nxv4i16.i16( + , + , + i16, + , + i64); + +define @intrinsic_vmsgtu_mask_vx_nxv4i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv4i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmsgtu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsgtu.mask.nxv4i16.i16( + %0, + %1, + i16 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsgtu.nxv8i16.i16( + , + i16, + i64); + +define @intrinsic_vmsgtu_vx_nxv8i16_i16( %0, i16 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_vx_nxv8i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmsgtu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsgtu.nxv8i16.i16( + %0, + i16 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsgtu.mask.nxv8i16.i16( + , + , + i16, + , + i64); + +define @intrinsic_vmsgtu_mask_vx_nxv8i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv8i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmsgtu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsgtu.mask.nxv8i16.i16( + %0, + %1, + i16 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsgtu.nxv16i16.i16( + , + i16, + i64); + +define @intrinsic_vmsgtu_vx_nxv16i16_i16( %0, i16 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_vx_nxv16i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmsgtu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsgtu.nxv16i16.i16( + %0, + i16 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsgtu.mask.nxv16i16.i16( + , + , + i16, + , + i64); + +define @intrinsic_vmsgtu_mask_vx_nxv16i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv16i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmsgtu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsgtu.mask.nxv16i16.i16( + %0, + %1, + i16 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsgtu.nxv1i32.i32( + , + i32, + i64); + +define @intrinsic_vmsgtu_vx_nxv1i32_i32( %0, i32 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_vx_nxv1i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmsgtu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsgtu.nxv1i32.i32( + %0, + i32 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsgtu.mask.nxv1i32.i32( + , + , + i32, + , + i64); + +define @intrinsic_vmsgtu_mask_vx_nxv1i32_i32( %0, %1, i32 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv1i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmsgtu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsgtu.mask.nxv1i32.i32( + %0, + %1, + i32 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsgtu.nxv2i32.i32( + , + i32, + i64); + +define @intrinsic_vmsgtu_vx_nxv2i32_i32( %0, i32 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_vx_nxv2i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmsgtu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsgtu.nxv2i32.i32( + %0, + i32 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsgtu.mask.nxv2i32.i32( + , + , + i32, + , + i64); + +define @intrinsic_vmsgtu_mask_vx_nxv2i32_i32( %0, %1, i32 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv2i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmsgtu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsgtu.mask.nxv2i32.i32( + %0, + %1, + i32 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsgtu.nxv4i32.i32( + , + i32, + i64); + +define @intrinsic_vmsgtu_vx_nxv4i32_i32( %0, i32 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_vx_nxv4i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmsgtu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsgtu.nxv4i32.i32( + %0, + i32 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsgtu.mask.nxv4i32.i32( + , + , + i32, + , + i64); + +define @intrinsic_vmsgtu_mask_vx_nxv4i32_i32( %0, %1, i32 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv4i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmsgtu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsgtu.mask.nxv4i32.i32( + %0, + %1, + i32 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsgtu.nxv8i32.i32( + , + i32, + i64); + +define @intrinsic_vmsgtu_vx_nxv8i32_i32( %0, i32 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_vx_nxv8i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmsgtu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsgtu.nxv8i32.i32( + %0, + i32 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsgtu.mask.nxv8i32.i32( + , + , + i32, + , + i64); + +define @intrinsic_vmsgtu_mask_vx_nxv8i32_i32( %0, %1, i32 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv8i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmsgtu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsgtu.mask.nxv8i32.i32( + %0, + %1, + i32 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsgtu.nxv1i64.i64( + , + i64, + i64); + +define @intrinsic_vmsgtu_vx_nxv1i64_i64( %0, i64 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_vx_nxv1i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vmsgtu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsgtu.nxv1i64.i64( + %0, + i64 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsgtu.mask.nxv1i64.i64( + , + , + i64, + , + i64); + +define @intrinsic_vmsgtu_mask_vx_nxv1i64_i64( %0, %1, i64 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv1i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vmsgtu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsgtu.mask.nxv1i64.i64( + %0, + %1, + i64 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsgtu.nxv2i64.i64( + , + i64, + i64); + +define @intrinsic_vmsgtu_vx_nxv2i64_i64( %0, i64 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_vx_nxv2i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vmsgtu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsgtu.nxv2i64.i64( + %0, + i64 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsgtu.mask.nxv2i64.i64( + , + , + i64, + , + i64); + +define @intrinsic_vmsgtu_mask_vx_nxv2i64_i64( %0, %1, i64 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv2i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vmsgtu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsgtu.mask.nxv2i64.i64( + %0, + %1, + i64 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsgtu.nxv4i64.i64( + , + i64, + i64); + +define @intrinsic_vmsgtu_vx_nxv4i64_i64( %0, i64 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_vx_nxv4i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vmsgtu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsgtu.nxv4i64.i64( + %0, + i64 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsgtu.mask.nxv4i64.i64( + , + , + i64, + , + i64); + +define @intrinsic_vmsgtu_mask_vx_nxv4i64_i64( %0, %1, i64 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv4i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vmsgtu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsgtu.mask.nxv4i64.i64( + %0, + %1, + i64 %2, + %3, + i64 %4) + + ret %a +} + +define @intrinsic_vmsgtu_vi_nxv1i8_i8( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_vi_nxv1i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmsgtu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsgtu.nxv1i8.i8( + %0, + i8 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmsgtu_mask_vi_nxv1i8_i8( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv1i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmsgtu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsgtu.mask.nxv1i8.i8( + %0, + %1, + i8 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmsgtu_vi_nxv2i8_i8( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_vi_nxv2i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmsgtu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsgtu.nxv2i8.i8( + %0, + i8 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmsgtu_mask_vi_nxv2i8_i8( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv2i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmsgtu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsgtu.mask.nxv2i8.i8( + %0, + %1, + i8 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmsgtu_vi_nxv4i8_i8( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_vi_nxv4i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmsgtu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsgtu.nxv4i8.i8( + %0, + i8 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmsgtu_mask_vi_nxv4i8_i8( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv4i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmsgtu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsgtu.mask.nxv4i8.i8( + %0, + %1, + i8 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmsgtu_vi_nxv8i8_i8( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_vi_nxv8i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmsgtu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsgtu.nxv8i8.i8( + %0, + i8 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmsgtu_mask_vi_nxv8i8_i8( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv8i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmsgtu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsgtu.mask.nxv8i8.i8( + %0, + %1, + i8 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmsgtu_vi_nxv16i8_i8( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_vi_nxv16i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmsgtu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsgtu.nxv16i8.i8( + %0, + i8 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmsgtu_mask_vi_nxv16i8_i8( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv16i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmsgtu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsgtu.mask.nxv16i8.i8( + %0, + %1, + i8 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmsgtu_vi_nxv32i8_i8( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_vi_nxv32i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmsgtu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsgtu.nxv32i8.i8( + %0, + i8 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmsgtu_mask_vi_nxv32i8_i8( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv32i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmsgtu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsgtu.mask.nxv32i8.i8( + %0, + %1, + i8 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmsgtu_vi_nxv1i16_i16( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_vi_nxv1i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmsgtu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsgtu.nxv1i16.i16( + %0, + i16 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmsgtu_mask_vi_nxv1i16_i16( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv1i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmsgtu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsgtu.mask.nxv1i16.i16( + %0, + %1, + i16 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmsgtu_vi_nxv2i16_i16( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_vi_nxv2i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmsgtu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsgtu.nxv2i16.i16( + %0, + i16 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmsgtu_mask_vi_nxv2i16_i16( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv2i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmsgtu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsgtu.mask.nxv2i16.i16( + %0, + %1, + i16 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmsgtu_vi_nxv4i16_i16( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_vi_nxv4i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmsgtu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsgtu.nxv4i16.i16( + %0, + i16 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmsgtu_mask_vi_nxv4i16_i16( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv4i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmsgtu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsgtu.mask.nxv4i16.i16( + %0, + %1, + i16 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmsgtu_vi_nxv8i16_i16( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_vi_nxv8i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmsgtu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsgtu.nxv8i16.i16( + %0, + i16 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmsgtu_mask_vi_nxv8i16_i16( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv8i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmsgtu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsgtu.mask.nxv8i16.i16( + %0, + %1, + i16 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmsgtu_vi_nxv16i16_i16( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_vi_nxv16i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmsgtu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsgtu.nxv16i16.i16( + %0, + i16 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmsgtu_mask_vi_nxv16i16_i16( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv16i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmsgtu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsgtu.mask.nxv16i16.i16( + %0, + %1, + i16 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmsgtu_vi_nxv1i32_i32( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_vi_nxv1i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmsgtu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsgtu.nxv1i32.i32( + %0, + i32 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmsgtu_mask_vi_nxv1i32_i32( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv1i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmsgtu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsgtu.mask.nxv1i32.i32( + %0, + %1, + i32 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmsgtu_vi_nxv2i32_i32( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_vi_nxv2i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmsgtu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsgtu.nxv2i32.i32( + %0, + i32 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmsgtu_mask_vi_nxv2i32_i32( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv2i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmsgtu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsgtu.mask.nxv2i32.i32( + %0, + %1, + i32 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmsgtu_vi_nxv4i32_i32( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_vi_nxv4i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmsgtu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsgtu.nxv4i32.i32( + %0, + i32 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmsgtu_mask_vi_nxv4i32_i32( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv4i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmsgtu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsgtu.mask.nxv4i32.i32( + %0, + %1, + i32 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmsgtu_vi_nxv8i32_i32( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_vi_nxv8i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmsgtu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsgtu.nxv8i32.i32( + %0, + i32 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmsgtu_mask_vi_nxv8i32_i32( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv8i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmsgtu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsgtu.mask.nxv8i32.i32( + %0, + %1, + i32 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmsgtu_vi_nxv1i64_i64( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_vi_nxv1i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vmsgtu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsgtu.nxv1i64.i64( + %0, + i64 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmsgtu_mask_vi_nxv1i64_i64( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv1i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vmsgtu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsgtu.mask.nxv1i64.i64( + %0, + %1, + i64 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmsgtu_vi_nxv2i64_i64( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_vi_nxv2i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vmsgtu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsgtu.nxv2i64.i64( + %0, + i64 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmsgtu_mask_vi_nxv2i64_i64( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv2i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vmsgtu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsgtu.mask.nxv2i64.i64( + %0, + %1, + i64 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmsgtu_vi_nxv4i64_i64( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_vi_nxv4i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vmsgtu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsgtu.nxv4i64.i64( + %0, + i64 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmsgtu_mask_vi_nxv4i64_i64( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv4i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vmsgtu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsgtu.mask.nxv4i64.i64( + %0, + %1, + i64 9, + %2, + i64 %3) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsle-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmsle-rv32.ll new file mode 100644 index 0000000000000..e86b0ef34494e --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vmsle-rv32.ll @@ -0,0 +1,1681 @@ +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare @llvm.riscv.vmsle.nxv1i8( + , + , + i32); + +define @intrinsic_vmsle_vv_nxv1i8_nxv1i8( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vv_nxv1i8_nxv1i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmsle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsle.nxv1i8( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsle.mask.nxv1i8( + , + , + , + , + i32); + +define @intrinsic_vmsle_mask_vv_nxv1i8_nxv1i8( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv1i8_nxv1i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmsle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsle.nxv1i8( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmsle.mask.nxv1i8( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsle.nxv2i8( + , + , + i32); + +define @intrinsic_vmsle_vv_nxv2i8_nxv2i8( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vv_nxv2i8_nxv2i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmsle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsle.nxv2i8( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsle.mask.nxv2i8( + , + , + , + , + i32); + +define @intrinsic_vmsle_mask_vv_nxv2i8_nxv2i8( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv2i8_nxv2i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmsle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsle.nxv2i8( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmsle.mask.nxv2i8( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsle.nxv4i8( + , + , + i32); + +define @intrinsic_vmsle_vv_nxv4i8_nxv4i8( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vv_nxv4i8_nxv4i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmsle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsle.nxv4i8( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsle.mask.nxv4i8( + , + , + , + , + i32); + +define @intrinsic_vmsle_mask_vv_nxv4i8_nxv4i8( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv4i8_nxv4i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmsle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsle.nxv4i8( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmsle.mask.nxv4i8( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsle.nxv8i8( + , + , + i32); + +define @intrinsic_vmsle_vv_nxv8i8_nxv8i8( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vv_nxv8i8_nxv8i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmsle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsle.nxv8i8( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsle.mask.nxv8i8( + , + , + , + , + i32); + +define @intrinsic_vmsle_mask_vv_nxv8i8_nxv8i8( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv8i8_nxv8i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmsle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsle.nxv8i8( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmsle.mask.nxv8i8( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsle.nxv16i8( + , + , + i32); + +define @intrinsic_vmsle_vv_nxv16i8_nxv16i8( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vv_nxv16i8_nxv16i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmsle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsle.nxv16i8( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsle.mask.nxv16i8( + , + , + , + , + i32); + +define @intrinsic_vmsle_mask_vv_nxv16i8_nxv16i8( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv16i8_nxv16i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmsle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsle.nxv16i8( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmsle.mask.nxv16i8( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsle.nxv32i8( + , + , + i32); + +define @intrinsic_vmsle_vv_nxv32i8_nxv32i8( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vv_nxv32i8_nxv32i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmsle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsle.nxv32i8( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsle.mask.nxv32i8( + , + , + , + , + i32); + +define @intrinsic_vmsle_mask_vv_nxv32i8_nxv32i8( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv32i8_nxv32i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmsle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsle.nxv32i8( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmsle.mask.nxv32i8( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsle.nxv1i16( + , + , + i32); + +define @intrinsic_vmsle_vv_nxv1i16_nxv1i16( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vv_nxv1i16_nxv1i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmsle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsle.nxv1i16( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsle.mask.nxv1i16( + , + , + , + , + i32); + +define @intrinsic_vmsle_mask_vv_nxv1i16_nxv1i16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv1i16_nxv1i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmsle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsle.nxv1i16( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmsle.mask.nxv1i16( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsle.nxv2i16( + , + , + i32); + +define @intrinsic_vmsle_vv_nxv2i16_nxv2i16( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vv_nxv2i16_nxv2i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmsle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsle.nxv2i16( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsle.mask.nxv2i16( + , + , + , + , + i32); + +define @intrinsic_vmsle_mask_vv_nxv2i16_nxv2i16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv2i16_nxv2i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmsle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsle.nxv2i16( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmsle.mask.nxv2i16( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsle.nxv4i16( + , + , + i32); + +define @intrinsic_vmsle_vv_nxv4i16_nxv4i16( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vv_nxv4i16_nxv4i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmsle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsle.nxv4i16( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsle.mask.nxv4i16( + , + , + , + , + i32); + +define @intrinsic_vmsle_mask_vv_nxv4i16_nxv4i16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv4i16_nxv4i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmsle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsle.nxv4i16( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmsle.mask.nxv4i16( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsle.nxv8i16( + , + , + i32); + +define @intrinsic_vmsle_vv_nxv8i16_nxv8i16( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vv_nxv8i16_nxv8i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmsle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsle.nxv8i16( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsle.mask.nxv8i16( + , + , + , + , + i32); + +define @intrinsic_vmsle_mask_vv_nxv8i16_nxv8i16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv8i16_nxv8i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmsle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsle.nxv8i16( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmsle.mask.nxv8i16( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsle.nxv16i16( + , + , + i32); + +define @intrinsic_vmsle_vv_nxv16i16_nxv16i16( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vv_nxv16i16_nxv16i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmsle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsle.nxv16i16( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsle.mask.nxv16i16( + , + , + , + , + i32); + +define @intrinsic_vmsle_mask_vv_nxv16i16_nxv16i16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv16i16_nxv16i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmsle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsle.nxv16i16( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmsle.mask.nxv16i16( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsle.nxv1i32( + , + , + i32); + +define @intrinsic_vmsle_vv_nxv1i32_nxv1i32( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vv_nxv1i32_nxv1i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmsle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsle.nxv1i32( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsle.mask.nxv1i32( + , + , + , + , + i32); + +define @intrinsic_vmsle_mask_vv_nxv1i32_nxv1i32( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv1i32_nxv1i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmsle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsle.nxv1i32( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmsle.mask.nxv1i32( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsle.nxv2i32( + , + , + i32); + +define @intrinsic_vmsle_vv_nxv2i32_nxv2i32( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vv_nxv2i32_nxv2i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmsle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsle.nxv2i32( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsle.mask.nxv2i32( + , + , + , + , + i32); + +define @intrinsic_vmsle_mask_vv_nxv2i32_nxv2i32( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv2i32_nxv2i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmsle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsle.nxv2i32( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmsle.mask.nxv2i32( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsle.nxv4i32( + , + , + i32); + +define @intrinsic_vmsle_vv_nxv4i32_nxv4i32( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vv_nxv4i32_nxv4i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmsle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsle.nxv4i32( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsle.mask.nxv4i32( + , + , + , + , + i32); + +define @intrinsic_vmsle_mask_vv_nxv4i32_nxv4i32( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv4i32_nxv4i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmsle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsle.nxv4i32( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmsle.mask.nxv4i32( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsle.nxv8i32( + , + , + i32); + +define @intrinsic_vmsle_vv_nxv8i32_nxv8i32( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vv_nxv8i32_nxv8i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmsle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsle.nxv8i32( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsle.mask.nxv8i32( + , + , + , + , + i32); + +define @intrinsic_vmsle_mask_vv_nxv8i32_nxv8i32( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv8i32_nxv8i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmsle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsle.nxv8i32( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmsle.mask.nxv8i32( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsle.nxv1i8.i8( + , + i8, + i32); + +define @intrinsic_vmsle_vx_nxv1i8_i8( %0, i8 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vx_nxv1i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmsle.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsle.nxv1i8.i8( + %0, + i8 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsle.mask.nxv1i8.i8( + , + , + i8, + , + i32); + +define @intrinsic_vmsle_mask_vx_nxv1i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv1i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmsle.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsle.mask.nxv1i8.i8( + %0, + %1, + i8 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsle.nxv2i8.i8( + , + i8, + i32); + +define @intrinsic_vmsle_vx_nxv2i8_i8( %0, i8 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vx_nxv2i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmsle.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsle.nxv2i8.i8( + %0, + i8 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsle.mask.nxv2i8.i8( + , + , + i8, + , + i32); + +define @intrinsic_vmsle_mask_vx_nxv2i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv2i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmsle.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsle.mask.nxv2i8.i8( + %0, + %1, + i8 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsle.nxv4i8.i8( + , + i8, + i32); + +define @intrinsic_vmsle_vx_nxv4i8_i8( %0, i8 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vx_nxv4i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmsle.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsle.nxv4i8.i8( + %0, + i8 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsle.mask.nxv4i8.i8( + , + , + i8, + , + i32); + +define @intrinsic_vmsle_mask_vx_nxv4i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv4i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmsle.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsle.mask.nxv4i8.i8( + %0, + %1, + i8 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsle.nxv8i8.i8( + , + i8, + i32); + +define @intrinsic_vmsle_vx_nxv8i8_i8( %0, i8 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vx_nxv8i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmsle.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsle.nxv8i8.i8( + %0, + i8 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsle.mask.nxv8i8.i8( + , + , + i8, + , + i32); + +define @intrinsic_vmsle_mask_vx_nxv8i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv8i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmsle.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsle.mask.nxv8i8.i8( + %0, + %1, + i8 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsle.nxv16i8.i8( + , + i8, + i32); + +define @intrinsic_vmsle_vx_nxv16i8_i8( %0, i8 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vx_nxv16i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmsle.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsle.nxv16i8.i8( + %0, + i8 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsle.mask.nxv16i8.i8( + , + , + i8, + , + i32); + +define @intrinsic_vmsle_mask_vx_nxv16i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv16i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmsle.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsle.mask.nxv16i8.i8( + %0, + %1, + i8 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsle.nxv32i8.i8( + , + i8, + i32); + +define @intrinsic_vmsle_vx_nxv32i8_i8( %0, i8 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vx_nxv32i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmsle.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsle.nxv32i8.i8( + %0, + i8 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsle.mask.nxv32i8.i8( + , + , + i8, + , + i32); + +define @intrinsic_vmsle_mask_vx_nxv32i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv32i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmsle.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsle.mask.nxv32i8.i8( + %0, + %1, + i8 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsle.nxv1i16.i16( + , + i16, + i32); + +define @intrinsic_vmsle_vx_nxv1i16_i16( %0, i16 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vx_nxv1i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmsle.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsle.nxv1i16.i16( + %0, + i16 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsle.mask.nxv1i16.i16( + , + , + i16, + , + i32); + +define @intrinsic_vmsle_mask_vx_nxv1i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv1i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmsle.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsle.mask.nxv1i16.i16( + %0, + %1, + i16 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsle.nxv2i16.i16( + , + i16, + i32); + +define @intrinsic_vmsle_vx_nxv2i16_i16( %0, i16 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vx_nxv2i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmsle.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsle.nxv2i16.i16( + %0, + i16 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsle.mask.nxv2i16.i16( + , + , + i16, + , + i32); + +define @intrinsic_vmsle_mask_vx_nxv2i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv2i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmsle.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsle.mask.nxv2i16.i16( + %0, + %1, + i16 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsle.nxv4i16.i16( + , + i16, + i32); + +define @intrinsic_vmsle_vx_nxv4i16_i16( %0, i16 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vx_nxv4i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmsle.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsle.nxv4i16.i16( + %0, + i16 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsle.mask.nxv4i16.i16( + , + , + i16, + , + i32); + +define @intrinsic_vmsle_mask_vx_nxv4i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv4i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmsle.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsle.mask.nxv4i16.i16( + %0, + %1, + i16 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsle.nxv8i16.i16( + , + i16, + i32); + +define @intrinsic_vmsle_vx_nxv8i16_i16( %0, i16 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vx_nxv8i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmsle.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsle.nxv8i16.i16( + %0, + i16 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsle.mask.nxv8i16.i16( + , + , + i16, + , + i32); + +define @intrinsic_vmsle_mask_vx_nxv8i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv8i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmsle.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsle.mask.nxv8i16.i16( + %0, + %1, + i16 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsle.nxv16i16.i16( + , + i16, + i32); + +define @intrinsic_vmsle_vx_nxv16i16_i16( %0, i16 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vx_nxv16i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmsle.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsle.nxv16i16.i16( + %0, + i16 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsle.mask.nxv16i16.i16( + , + , + i16, + , + i32); + +define @intrinsic_vmsle_mask_vx_nxv16i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv16i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmsle.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsle.mask.nxv16i16.i16( + %0, + %1, + i16 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsle.nxv1i32.i32( + , + i32, + i32); + +define @intrinsic_vmsle_vx_nxv1i32_i32( %0, i32 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vx_nxv1i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmsle.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsle.nxv1i32.i32( + %0, + i32 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsle.mask.nxv1i32.i32( + , + , + i32, + , + i32); + +define @intrinsic_vmsle_mask_vx_nxv1i32_i32( %0, %1, i32 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv1i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmsle.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsle.mask.nxv1i32.i32( + %0, + %1, + i32 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsle.nxv2i32.i32( + , + i32, + i32); + +define @intrinsic_vmsle_vx_nxv2i32_i32( %0, i32 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vx_nxv2i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmsle.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsle.nxv2i32.i32( + %0, + i32 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsle.mask.nxv2i32.i32( + , + , + i32, + , + i32); + +define @intrinsic_vmsle_mask_vx_nxv2i32_i32( %0, %1, i32 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv2i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmsle.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsle.mask.nxv2i32.i32( + %0, + %1, + i32 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsle.nxv4i32.i32( + , + i32, + i32); + +define @intrinsic_vmsle_vx_nxv4i32_i32( %0, i32 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vx_nxv4i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmsle.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsle.nxv4i32.i32( + %0, + i32 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsle.mask.nxv4i32.i32( + , + , + i32, + , + i32); + +define @intrinsic_vmsle_mask_vx_nxv4i32_i32( %0, %1, i32 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv4i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmsle.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsle.mask.nxv4i32.i32( + %0, + %1, + i32 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsle.nxv8i32.i32( + , + i32, + i32); + +define @intrinsic_vmsle_vx_nxv8i32_i32( %0, i32 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vx_nxv8i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmsle.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsle.nxv8i32.i32( + %0, + i32 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsle.mask.nxv8i32.i32( + , + , + i32, + , + i32); + +define @intrinsic_vmsle_mask_vx_nxv8i32_i32( %0, %1, i32 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv8i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmsle.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsle.mask.nxv8i32.i32( + %0, + %1, + i32 %2, + %3, + i32 %4) + + ret %a +} + +define @intrinsic_vmsle_vi_nxv1i8_i8( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vi_nxv1i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmsle.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsle.nxv1i8.i8( + %0, + i8 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmsle_mask_vi_nxv1i8_i8( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv1i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmsle.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsle.mask.nxv1i8.i8( + %0, + %1, + i8 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vmsle_vi_nxv2i8_i8( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vi_nxv2i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmsle.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsle.nxv2i8.i8( + %0, + i8 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmsle_mask_vi_nxv2i8_i8( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv2i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmsle.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsle.mask.nxv2i8.i8( + %0, + %1, + i8 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vmsle_vi_nxv4i8_i8( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vi_nxv4i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmsle.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsle.nxv4i8.i8( + %0, + i8 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmsle_mask_vi_nxv4i8_i8( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv4i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmsle.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsle.mask.nxv4i8.i8( + %0, + %1, + i8 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vmsle_vi_nxv8i8_i8( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vi_nxv8i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmsle.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsle.nxv8i8.i8( + %0, + i8 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmsle_mask_vi_nxv8i8_i8( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv8i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmsle.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsle.mask.nxv8i8.i8( + %0, + %1, + i8 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vmsle_vi_nxv16i8_i8( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vi_nxv16i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmsle.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsle.nxv16i8.i8( + %0, + i8 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmsle_mask_vi_nxv16i8_i8( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv16i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmsle.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsle.mask.nxv16i8.i8( + %0, + %1, + i8 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vmsle_vi_nxv32i8_i8( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vi_nxv32i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmsle.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsle.nxv32i8.i8( + %0, + i8 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmsle_mask_vi_nxv32i8_i8( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv32i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmsle.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsle.mask.nxv32i8.i8( + %0, + %1, + i8 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vmsle_vi_nxv1i16_i16( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vi_nxv1i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmsle.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsle.nxv1i16.i16( + %0, + i16 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmsle_mask_vi_nxv1i16_i16( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv1i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmsle.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsle.mask.nxv1i16.i16( + %0, + %1, + i16 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vmsle_vi_nxv2i16_i16( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vi_nxv2i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmsle.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsle.nxv2i16.i16( + %0, + i16 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmsle_mask_vi_nxv2i16_i16( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv2i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmsle.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsle.mask.nxv2i16.i16( + %0, + %1, + i16 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vmsle_vi_nxv4i16_i16( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vi_nxv4i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmsle.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsle.nxv4i16.i16( + %0, + i16 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmsle_mask_vi_nxv4i16_i16( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv4i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmsle.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsle.mask.nxv4i16.i16( + %0, + %1, + i16 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vmsle_vi_nxv8i16_i16( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vi_nxv8i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmsle.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsle.nxv8i16.i16( + %0, + i16 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmsle_mask_vi_nxv8i16_i16( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv8i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmsle.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsle.mask.nxv8i16.i16( + %0, + %1, + i16 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vmsle_vi_nxv16i16_i16( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vi_nxv16i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmsle.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsle.nxv16i16.i16( + %0, + i16 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmsle_mask_vi_nxv16i16_i16( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv16i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmsle.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsle.mask.nxv16i16.i16( + %0, + %1, + i16 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vmsle_vi_nxv1i32_i32( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vi_nxv1i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmsle.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsle.nxv1i32.i32( + %0, + i32 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmsle_mask_vi_nxv1i32_i32( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv1i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmsle.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsle.mask.nxv1i32.i32( + %0, + %1, + i32 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vmsle_vi_nxv2i32_i32( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vi_nxv2i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmsle.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsle.nxv2i32.i32( + %0, + i32 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmsle_mask_vi_nxv2i32_i32( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv2i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmsle.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsle.mask.nxv2i32.i32( + %0, + %1, + i32 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vmsle_vi_nxv4i32_i32( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vi_nxv4i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmsle.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsle.nxv4i32.i32( + %0, + i32 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmsle_mask_vi_nxv4i32_i32( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv4i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmsle.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsle.mask.nxv4i32.i32( + %0, + %1, + i32 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vmsle_vi_nxv8i32_i32( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vi_nxv8i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmsle.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsle.nxv8i32.i32( + %0, + i32 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmsle_mask_vi_nxv8i32_i32( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv8i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmsle.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsle.mask.nxv8i32.i32( + %0, + %1, + i32 9, + %2, + i32 %3) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsle-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmsle-rv64.ll new file mode 100644 index 0000000000000..2f33fb15d0fd4 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vmsle-rv64.ll @@ -0,0 +1,2017 @@ +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare @llvm.riscv.vmsle.nxv1i8( + , + , + i64); + +define @intrinsic_vmsle_vv_nxv1i8_nxv1i8( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vv_nxv1i8_nxv1i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmsle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsle.nxv1i8( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsle.mask.nxv1i8( + , + , + , + , + i64); + +define @intrinsic_vmsle_mask_vv_nxv1i8_nxv1i8( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv1i8_nxv1i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmsle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsle.nxv1i8( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmsle.mask.nxv1i8( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsle.nxv2i8( + , + , + i64); + +define @intrinsic_vmsle_vv_nxv2i8_nxv2i8( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vv_nxv2i8_nxv2i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmsle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsle.nxv2i8( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsle.mask.nxv2i8( + , + , + , + , + i64); + +define @intrinsic_vmsle_mask_vv_nxv2i8_nxv2i8( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv2i8_nxv2i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmsle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsle.nxv2i8( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmsle.mask.nxv2i8( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsle.nxv4i8( + , + , + i64); + +define @intrinsic_vmsle_vv_nxv4i8_nxv4i8( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vv_nxv4i8_nxv4i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmsle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsle.nxv4i8( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsle.mask.nxv4i8( + , + , + , + , + i64); + +define @intrinsic_vmsle_mask_vv_nxv4i8_nxv4i8( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv4i8_nxv4i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmsle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsle.nxv4i8( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmsle.mask.nxv4i8( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsle.nxv8i8( + , + , + i64); + +define @intrinsic_vmsle_vv_nxv8i8_nxv8i8( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vv_nxv8i8_nxv8i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmsle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsle.nxv8i8( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsle.mask.nxv8i8( + , + , + , + , + i64); + +define @intrinsic_vmsle_mask_vv_nxv8i8_nxv8i8( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv8i8_nxv8i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmsle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsle.nxv8i8( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmsle.mask.nxv8i8( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsle.nxv16i8( + , + , + i64); + +define @intrinsic_vmsle_vv_nxv16i8_nxv16i8( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vv_nxv16i8_nxv16i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmsle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsle.nxv16i8( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsle.mask.nxv16i8( + , + , + , + , + i64); + +define @intrinsic_vmsle_mask_vv_nxv16i8_nxv16i8( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv16i8_nxv16i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmsle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsle.nxv16i8( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmsle.mask.nxv16i8( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsle.nxv32i8( + , + , + i64); + +define @intrinsic_vmsle_vv_nxv32i8_nxv32i8( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vv_nxv32i8_nxv32i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmsle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsle.nxv32i8( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsle.mask.nxv32i8( + , + , + , + , + i64); + +define @intrinsic_vmsle_mask_vv_nxv32i8_nxv32i8( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv32i8_nxv32i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmsle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsle.nxv32i8( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmsle.mask.nxv32i8( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsle.nxv1i16( + , + , + i64); + +define @intrinsic_vmsle_vv_nxv1i16_nxv1i16( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vv_nxv1i16_nxv1i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmsle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsle.nxv1i16( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsle.mask.nxv1i16( + , + , + , + , + i64); + +define @intrinsic_vmsle_mask_vv_nxv1i16_nxv1i16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv1i16_nxv1i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmsle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsle.nxv1i16( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmsle.mask.nxv1i16( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsle.nxv2i16( + , + , + i64); + +define @intrinsic_vmsle_vv_nxv2i16_nxv2i16( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vv_nxv2i16_nxv2i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmsle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsle.nxv2i16( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsle.mask.nxv2i16( + , + , + , + , + i64); + +define @intrinsic_vmsle_mask_vv_nxv2i16_nxv2i16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv2i16_nxv2i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmsle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsle.nxv2i16( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmsle.mask.nxv2i16( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsle.nxv4i16( + , + , + i64); + +define @intrinsic_vmsle_vv_nxv4i16_nxv4i16( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vv_nxv4i16_nxv4i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmsle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsle.nxv4i16( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsle.mask.nxv4i16( + , + , + , + , + i64); + +define @intrinsic_vmsle_mask_vv_nxv4i16_nxv4i16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv4i16_nxv4i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmsle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsle.nxv4i16( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmsle.mask.nxv4i16( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsle.nxv8i16( + , + , + i64); + +define @intrinsic_vmsle_vv_nxv8i16_nxv8i16( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vv_nxv8i16_nxv8i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmsle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsle.nxv8i16( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsle.mask.nxv8i16( + , + , + , + , + i64); + +define @intrinsic_vmsle_mask_vv_nxv8i16_nxv8i16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv8i16_nxv8i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmsle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsle.nxv8i16( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmsle.mask.nxv8i16( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsle.nxv16i16( + , + , + i64); + +define @intrinsic_vmsle_vv_nxv16i16_nxv16i16( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vv_nxv16i16_nxv16i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmsle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsle.nxv16i16( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsle.mask.nxv16i16( + , + , + , + , + i64); + +define @intrinsic_vmsle_mask_vv_nxv16i16_nxv16i16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv16i16_nxv16i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmsle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsle.nxv16i16( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmsle.mask.nxv16i16( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsle.nxv1i32( + , + , + i64); + +define @intrinsic_vmsle_vv_nxv1i32_nxv1i32( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vv_nxv1i32_nxv1i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmsle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsle.nxv1i32( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsle.mask.nxv1i32( + , + , + , + , + i64); + +define @intrinsic_vmsle_mask_vv_nxv1i32_nxv1i32( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv1i32_nxv1i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmsle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsle.nxv1i32( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmsle.mask.nxv1i32( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsle.nxv2i32( + , + , + i64); + +define @intrinsic_vmsle_vv_nxv2i32_nxv2i32( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vv_nxv2i32_nxv2i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmsle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsle.nxv2i32( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsle.mask.nxv2i32( + , + , + , + , + i64); + +define @intrinsic_vmsle_mask_vv_nxv2i32_nxv2i32( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv2i32_nxv2i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmsle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsle.nxv2i32( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmsle.mask.nxv2i32( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsle.nxv4i32( + , + , + i64); + +define @intrinsic_vmsle_vv_nxv4i32_nxv4i32( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vv_nxv4i32_nxv4i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmsle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsle.nxv4i32( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsle.mask.nxv4i32( + , + , + , + , + i64); + +define @intrinsic_vmsle_mask_vv_nxv4i32_nxv4i32( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv4i32_nxv4i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmsle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsle.nxv4i32( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmsle.mask.nxv4i32( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsle.nxv8i32( + , + , + i64); + +define @intrinsic_vmsle_vv_nxv8i32_nxv8i32( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vv_nxv8i32_nxv8i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmsle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsle.nxv8i32( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsle.mask.nxv8i32( + , + , + , + , + i64); + +define @intrinsic_vmsle_mask_vv_nxv8i32_nxv8i32( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv8i32_nxv8i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmsle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsle.nxv8i32( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmsle.mask.nxv8i32( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsle.nxv1i64( + , + , + i64); + +define @intrinsic_vmsle_vv_nxv1i64_nxv1i64( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vv_nxv1i64_nxv1i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vmsle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsle.nxv1i64( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsle.mask.nxv1i64( + , + , + , + , + i64); + +define @intrinsic_vmsle_mask_vv_nxv1i64_nxv1i64( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv1i64_nxv1i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vmsle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsle.nxv1i64( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmsle.mask.nxv1i64( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsle.nxv2i64( + , + , + i64); + +define @intrinsic_vmsle_vv_nxv2i64_nxv2i64( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vv_nxv2i64_nxv2i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vmsle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsle.nxv2i64( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsle.mask.nxv2i64( + , + , + , + , + i64); + +define @intrinsic_vmsle_mask_vv_nxv2i64_nxv2i64( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv2i64_nxv2i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vmsle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsle.nxv2i64( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmsle.mask.nxv2i64( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsle.nxv4i64( + , + , + i64); + +define @intrinsic_vmsle_vv_nxv4i64_nxv4i64( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vv_nxv4i64_nxv4i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vmsle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsle.nxv4i64( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsle.mask.nxv4i64( + , + , + , + , + i64); + +define @intrinsic_vmsle_mask_vv_nxv4i64_nxv4i64( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv4i64_nxv4i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vmsle.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsle.nxv4i64( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmsle.mask.nxv4i64( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsle.nxv1i8.i8( + , + i8, + i64); + +define @intrinsic_vmsle_vx_nxv1i8_i8( %0, i8 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vx_nxv1i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmsle.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsle.nxv1i8.i8( + %0, + i8 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsle.mask.nxv1i8.i8( + , + , + i8, + , + i64); + +define @intrinsic_vmsle_mask_vx_nxv1i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv1i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmsle.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsle.mask.nxv1i8.i8( + %0, + %1, + i8 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsle.nxv2i8.i8( + , + i8, + i64); + +define @intrinsic_vmsle_vx_nxv2i8_i8( %0, i8 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vx_nxv2i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmsle.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsle.nxv2i8.i8( + %0, + i8 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsle.mask.nxv2i8.i8( + , + , + i8, + , + i64); + +define @intrinsic_vmsle_mask_vx_nxv2i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv2i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmsle.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsle.mask.nxv2i8.i8( + %0, + %1, + i8 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsle.nxv4i8.i8( + , + i8, + i64); + +define @intrinsic_vmsle_vx_nxv4i8_i8( %0, i8 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vx_nxv4i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmsle.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsle.nxv4i8.i8( + %0, + i8 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsle.mask.nxv4i8.i8( + , + , + i8, + , + i64); + +define @intrinsic_vmsle_mask_vx_nxv4i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv4i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmsle.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsle.mask.nxv4i8.i8( + %0, + %1, + i8 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsle.nxv8i8.i8( + , + i8, + i64); + +define @intrinsic_vmsle_vx_nxv8i8_i8( %0, i8 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vx_nxv8i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmsle.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsle.nxv8i8.i8( + %0, + i8 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsle.mask.nxv8i8.i8( + , + , + i8, + , + i64); + +define @intrinsic_vmsle_mask_vx_nxv8i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv8i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmsle.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsle.mask.nxv8i8.i8( + %0, + %1, + i8 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsle.nxv16i8.i8( + , + i8, + i64); + +define @intrinsic_vmsle_vx_nxv16i8_i8( %0, i8 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vx_nxv16i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmsle.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsle.nxv16i8.i8( + %0, + i8 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsle.mask.nxv16i8.i8( + , + , + i8, + , + i64); + +define @intrinsic_vmsle_mask_vx_nxv16i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv16i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmsle.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsle.mask.nxv16i8.i8( + %0, + %1, + i8 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsle.nxv32i8.i8( + , + i8, + i64); + +define @intrinsic_vmsle_vx_nxv32i8_i8( %0, i8 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vx_nxv32i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmsle.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsle.nxv32i8.i8( + %0, + i8 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsle.mask.nxv32i8.i8( + , + , + i8, + , + i64); + +define @intrinsic_vmsle_mask_vx_nxv32i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv32i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmsle.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsle.mask.nxv32i8.i8( + %0, + %1, + i8 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsle.nxv1i16.i16( + , + i16, + i64); + +define @intrinsic_vmsle_vx_nxv1i16_i16( %0, i16 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vx_nxv1i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmsle.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsle.nxv1i16.i16( + %0, + i16 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsle.mask.nxv1i16.i16( + , + , + i16, + , + i64); + +define @intrinsic_vmsle_mask_vx_nxv1i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv1i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmsle.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsle.mask.nxv1i16.i16( + %0, + %1, + i16 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsle.nxv2i16.i16( + , + i16, + i64); + +define @intrinsic_vmsle_vx_nxv2i16_i16( %0, i16 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vx_nxv2i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmsle.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsle.nxv2i16.i16( + %0, + i16 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsle.mask.nxv2i16.i16( + , + , + i16, + , + i64); + +define @intrinsic_vmsle_mask_vx_nxv2i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv2i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmsle.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsle.mask.nxv2i16.i16( + %0, + %1, + i16 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsle.nxv4i16.i16( + , + i16, + i64); + +define @intrinsic_vmsle_vx_nxv4i16_i16( %0, i16 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vx_nxv4i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmsle.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsle.nxv4i16.i16( + %0, + i16 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsle.mask.nxv4i16.i16( + , + , + i16, + , + i64); + +define @intrinsic_vmsle_mask_vx_nxv4i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv4i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmsle.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsle.mask.nxv4i16.i16( + %0, + %1, + i16 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsle.nxv8i16.i16( + , + i16, + i64); + +define @intrinsic_vmsle_vx_nxv8i16_i16( %0, i16 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vx_nxv8i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmsle.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsle.nxv8i16.i16( + %0, + i16 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsle.mask.nxv8i16.i16( + , + , + i16, + , + i64); + +define @intrinsic_vmsle_mask_vx_nxv8i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv8i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmsle.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsle.mask.nxv8i16.i16( + %0, + %1, + i16 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsle.nxv16i16.i16( + , + i16, + i64); + +define @intrinsic_vmsle_vx_nxv16i16_i16( %0, i16 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vx_nxv16i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmsle.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsle.nxv16i16.i16( + %0, + i16 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsle.mask.nxv16i16.i16( + , + , + i16, + , + i64); + +define @intrinsic_vmsle_mask_vx_nxv16i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv16i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmsle.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsle.mask.nxv16i16.i16( + %0, + %1, + i16 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsle.nxv1i32.i32( + , + i32, + i64); + +define @intrinsic_vmsle_vx_nxv1i32_i32( %0, i32 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vx_nxv1i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmsle.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsle.nxv1i32.i32( + %0, + i32 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsle.mask.nxv1i32.i32( + , + , + i32, + , + i64); + +define @intrinsic_vmsle_mask_vx_nxv1i32_i32( %0, %1, i32 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv1i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmsle.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsle.mask.nxv1i32.i32( + %0, + %1, + i32 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsle.nxv2i32.i32( + , + i32, + i64); + +define @intrinsic_vmsle_vx_nxv2i32_i32( %0, i32 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vx_nxv2i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmsle.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsle.nxv2i32.i32( + %0, + i32 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsle.mask.nxv2i32.i32( + , + , + i32, + , + i64); + +define @intrinsic_vmsle_mask_vx_nxv2i32_i32( %0, %1, i32 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv2i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmsle.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsle.mask.nxv2i32.i32( + %0, + %1, + i32 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsle.nxv4i32.i32( + , + i32, + i64); + +define @intrinsic_vmsle_vx_nxv4i32_i32( %0, i32 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vx_nxv4i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmsle.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsle.nxv4i32.i32( + %0, + i32 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsle.mask.nxv4i32.i32( + , + , + i32, + , + i64); + +define @intrinsic_vmsle_mask_vx_nxv4i32_i32( %0, %1, i32 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv4i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmsle.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsle.mask.nxv4i32.i32( + %0, + %1, + i32 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsle.nxv8i32.i32( + , + i32, + i64); + +define @intrinsic_vmsle_vx_nxv8i32_i32( %0, i32 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vx_nxv8i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmsle.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsle.nxv8i32.i32( + %0, + i32 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsle.mask.nxv8i32.i32( + , + , + i32, + , + i64); + +define @intrinsic_vmsle_mask_vx_nxv8i32_i32( %0, %1, i32 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv8i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmsle.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsle.mask.nxv8i32.i32( + %0, + %1, + i32 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsle.nxv1i64.i64( + , + i64, + i64); + +define @intrinsic_vmsle_vx_nxv1i64_i64( %0, i64 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vx_nxv1i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vmsle.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsle.nxv1i64.i64( + %0, + i64 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsle.mask.nxv1i64.i64( + , + , + i64, + , + i64); + +define @intrinsic_vmsle_mask_vx_nxv1i64_i64( %0, %1, i64 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv1i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vmsle.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsle.mask.nxv1i64.i64( + %0, + %1, + i64 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsle.nxv2i64.i64( + , + i64, + i64); + +define @intrinsic_vmsle_vx_nxv2i64_i64( %0, i64 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vx_nxv2i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vmsle.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsle.nxv2i64.i64( + %0, + i64 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsle.mask.nxv2i64.i64( + , + , + i64, + , + i64); + +define @intrinsic_vmsle_mask_vx_nxv2i64_i64( %0, %1, i64 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv2i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vmsle.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsle.mask.nxv2i64.i64( + %0, + %1, + i64 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsle.nxv4i64.i64( + , + i64, + i64); + +define @intrinsic_vmsle_vx_nxv4i64_i64( %0, i64 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vx_nxv4i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vmsle.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsle.nxv4i64.i64( + %0, + i64 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsle.mask.nxv4i64.i64( + , + , + i64, + , + i64); + +define @intrinsic_vmsle_mask_vx_nxv4i64_i64( %0, %1, i64 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv4i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vmsle.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsle.mask.nxv4i64.i64( + %0, + %1, + i64 %2, + %3, + i64 %4) + + ret %a +} + +define @intrinsic_vmsle_vi_nxv1i8_i8( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vi_nxv1i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmsle.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsle.nxv1i8.i8( + %0, + i8 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmsle_mask_vi_nxv1i8_i8( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv1i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmsle.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsle.mask.nxv1i8.i8( + %0, + %1, + i8 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmsle_vi_nxv2i8_i8( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vi_nxv2i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmsle.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsle.nxv2i8.i8( + %0, + i8 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmsle_mask_vi_nxv2i8_i8( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv2i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmsle.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsle.mask.nxv2i8.i8( + %0, + %1, + i8 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmsle_vi_nxv4i8_i8( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vi_nxv4i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmsle.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsle.nxv4i8.i8( + %0, + i8 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmsle_mask_vi_nxv4i8_i8( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv4i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmsle.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsle.mask.nxv4i8.i8( + %0, + %1, + i8 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmsle_vi_nxv8i8_i8( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vi_nxv8i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmsle.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsle.nxv8i8.i8( + %0, + i8 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmsle_mask_vi_nxv8i8_i8( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv8i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmsle.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsle.mask.nxv8i8.i8( + %0, + %1, + i8 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmsle_vi_nxv16i8_i8( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vi_nxv16i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmsle.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsle.nxv16i8.i8( + %0, + i8 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmsle_mask_vi_nxv16i8_i8( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv16i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmsle.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsle.mask.nxv16i8.i8( + %0, + %1, + i8 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmsle_vi_nxv32i8_i8( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vi_nxv32i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmsle.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsle.nxv32i8.i8( + %0, + i8 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmsle_mask_vi_nxv32i8_i8( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv32i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmsle.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsle.mask.nxv32i8.i8( + %0, + %1, + i8 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmsle_vi_nxv1i16_i16( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vi_nxv1i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmsle.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsle.nxv1i16.i16( + %0, + i16 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmsle_mask_vi_nxv1i16_i16( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv1i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmsle.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsle.mask.nxv1i16.i16( + %0, + %1, + i16 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmsle_vi_nxv2i16_i16( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vi_nxv2i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmsle.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsle.nxv2i16.i16( + %0, + i16 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmsle_mask_vi_nxv2i16_i16( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv2i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmsle.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsle.mask.nxv2i16.i16( + %0, + %1, + i16 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmsle_vi_nxv4i16_i16( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vi_nxv4i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmsle.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsle.nxv4i16.i16( + %0, + i16 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmsle_mask_vi_nxv4i16_i16( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv4i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmsle.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsle.mask.nxv4i16.i16( + %0, + %1, + i16 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmsle_vi_nxv8i16_i16( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vi_nxv8i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmsle.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsle.nxv8i16.i16( + %0, + i16 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmsle_mask_vi_nxv8i16_i16( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv8i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmsle.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsle.mask.nxv8i16.i16( + %0, + %1, + i16 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmsle_vi_nxv16i16_i16( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vi_nxv16i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmsle.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsle.nxv16i16.i16( + %0, + i16 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmsle_mask_vi_nxv16i16_i16( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv16i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmsle.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsle.mask.nxv16i16.i16( + %0, + %1, + i16 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmsle_vi_nxv1i32_i32( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vi_nxv1i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmsle.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsle.nxv1i32.i32( + %0, + i32 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmsle_mask_vi_nxv1i32_i32( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv1i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmsle.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsle.mask.nxv1i32.i32( + %0, + %1, + i32 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmsle_vi_nxv2i32_i32( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vi_nxv2i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmsle.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsle.nxv2i32.i32( + %0, + i32 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmsle_mask_vi_nxv2i32_i32( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv2i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmsle.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsle.mask.nxv2i32.i32( + %0, + %1, + i32 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmsle_vi_nxv4i32_i32( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vi_nxv4i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmsle.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsle.nxv4i32.i32( + %0, + i32 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmsle_mask_vi_nxv4i32_i32( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv4i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmsle.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsle.mask.nxv4i32.i32( + %0, + %1, + i32 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmsle_vi_nxv8i32_i32( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vi_nxv8i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmsle.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsle.nxv8i32.i32( + %0, + i32 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmsle_mask_vi_nxv8i32_i32( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv8i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmsle.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsle.mask.nxv8i32.i32( + %0, + %1, + i32 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmsle_vi_nxv1i64_i64( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vi_nxv1i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vmsle.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsle.nxv1i64.i64( + %0, + i64 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmsle_mask_vi_nxv1i64_i64( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv1i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vmsle.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsle.mask.nxv1i64.i64( + %0, + %1, + i64 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmsle_vi_nxv2i64_i64( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vi_nxv2i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vmsle.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsle.nxv2i64.i64( + %0, + i64 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmsle_mask_vi_nxv2i64_i64( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv2i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vmsle.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsle.mask.nxv2i64.i64( + %0, + %1, + i64 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmsle_vi_nxv4i64_i64( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_vi_nxv4i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vmsle.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsle.nxv4i64.i64( + %0, + i64 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmsle_mask_vi_nxv4i64_i64( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv4i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vmsle.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsle.mask.nxv4i64.i64( + %0, + %1, + i64 9, + %2, + i64 %3) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsleu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmsleu-rv32.ll new file mode 100644 index 0000000000000..7ac727aa494e5 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vmsleu-rv32.ll @@ -0,0 +1,1681 @@ +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare @llvm.riscv.vmsleu.nxv1i8( + , + , + i32); + +define @intrinsic_vmsleu_vv_nxv1i8_nxv1i8( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vv_nxv1i8_nxv1i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmsleu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsleu.nxv1i8( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsleu.mask.nxv1i8( + , + , + , + , + i32); + +define @intrinsic_vmsleu_mask_vv_nxv1i8_nxv1i8( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv1i8_nxv1i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmsleu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsleu.nxv1i8( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmsleu.mask.nxv1i8( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsleu.nxv2i8( + , + , + i32); + +define @intrinsic_vmsleu_vv_nxv2i8_nxv2i8( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vv_nxv2i8_nxv2i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmsleu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsleu.nxv2i8( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsleu.mask.nxv2i8( + , + , + , + , + i32); + +define @intrinsic_vmsleu_mask_vv_nxv2i8_nxv2i8( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv2i8_nxv2i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmsleu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsleu.nxv2i8( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmsleu.mask.nxv2i8( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsleu.nxv4i8( + , + , + i32); + +define @intrinsic_vmsleu_vv_nxv4i8_nxv4i8( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vv_nxv4i8_nxv4i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmsleu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsleu.nxv4i8( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsleu.mask.nxv4i8( + , + , + , + , + i32); + +define @intrinsic_vmsleu_mask_vv_nxv4i8_nxv4i8( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv4i8_nxv4i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmsleu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsleu.nxv4i8( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmsleu.mask.nxv4i8( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsleu.nxv8i8( + , + , + i32); + +define @intrinsic_vmsleu_vv_nxv8i8_nxv8i8( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vv_nxv8i8_nxv8i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmsleu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsleu.nxv8i8( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsleu.mask.nxv8i8( + , + , + , + , + i32); + +define @intrinsic_vmsleu_mask_vv_nxv8i8_nxv8i8( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv8i8_nxv8i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmsleu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsleu.nxv8i8( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmsleu.mask.nxv8i8( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsleu.nxv16i8( + , + , + i32); + +define @intrinsic_vmsleu_vv_nxv16i8_nxv16i8( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vv_nxv16i8_nxv16i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmsleu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsleu.nxv16i8( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsleu.mask.nxv16i8( + , + , + , + , + i32); + +define @intrinsic_vmsleu_mask_vv_nxv16i8_nxv16i8( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv16i8_nxv16i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmsleu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsleu.nxv16i8( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmsleu.mask.nxv16i8( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsleu.nxv32i8( + , + , + i32); + +define @intrinsic_vmsleu_vv_nxv32i8_nxv32i8( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vv_nxv32i8_nxv32i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmsleu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsleu.nxv32i8( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsleu.mask.nxv32i8( + , + , + , + , + i32); + +define @intrinsic_vmsleu_mask_vv_nxv32i8_nxv32i8( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv32i8_nxv32i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmsleu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsleu.nxv32i8( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmsleu.mask.nxv32i8( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsleu.nxv1i16( + , + , + i32); + +define @intrinsic_vmsleu_vv_nxv1i16_nxv1i16( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vv_nxv1i16_nxv1i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmsleu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsleu.nxv1i16( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsleu.mask.nxv1i16( + , + , + , + , + i32); + +define @intrinsic_vmsleu_mask_vv_nxv1i16_nxv1i16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv1i16_nxv1i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmsleu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsleu.nxv1i16( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmsleu.mask.nxv1i16( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsleu.nxv2i16( + , + , + i32); + +define @intrinsic_vmsleu_vv_nxv2i16_nxv2i16( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vv_nxv2i16_nxv2i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmsleu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsleu.nxv2i16( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsleu.mask.nxv2i16( + , + , + , + , + i32); + +define @intrinsic_vmsleu_mask_vv_nxv2i16_nxv2i16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv2i16_nxv2i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmsleu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsleu.nxv2i16( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmsleu.mask.nxv2i16( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsleu.nxv4i16( + , + , + i32); + +define @intrinsic_vmsleu_vv_nxv4i16_nxv4i16( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vv_nxv4i16_nxv4i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmsleu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsleu.nxv4i16( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsleu.mask.nxv4i16( + , + , + , + , + i32); + +define @intrinsic_vmsleu_mask_vv_nxv4i16_nxv4i16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv4i16_nxv4i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmsleu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsleu.nxv4i16( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmsleu.mask.nxv4i16( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsleu.nxv8i16( + , + , + i32); + +define @intrinsic_vmsleu_vv_nxv8i16_nxv8i16( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vv_nxv8i16_nxv8i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmsleu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsleu.nxv8i16( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsleu.mask.nxv8i16( + , + , + , + , + i32); + +define @intrinsic_vmsleu_mask_vv_nxv8i16_nxv8i16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv8i16_nxv8i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmsleu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsleu.nxv8i16( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmsleu.mask.nxv8i16( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsleu.nxv16i16( + , + , + i32); + +define @intrinsic_vmsleu_vv_nxv16i16_nxv16i16( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vv_nxv16i16_nxv16i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmsleu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsleu.nxv16i16( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsleu.mask.nxv16i16( + , + , + , + , + i32); + +define @intrinsic_vmsleu_mask_vv_nxv16i16_nxv16i16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv16i16_nxv16i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmsleu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsleu.nxv16i16( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmsleu.mask.nxv16i16( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsleu.nxv1i32( + , + , + i32); + +define @intrinsic_vmsleu_vv_nxv1i32_nxv1i32( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vv_nxv1i32_nxv1i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmsleu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsleu.nxv1i32( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsleu.mask.nxv1i32( + , + , + , + , + i32); + +define @intrinsic_vmsleu_mask_vv_nxv1i32_nxv1i32( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv1i32_nxv1i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmsleu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsleu.nxv1i32( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmsleu.mask.nxv1i32( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsleu.nxv2i32( + , + , + i32); + +define @intrinsic_vmsleu_vv_nxv2i32_nxv2i32( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vv_nxv2i32_nxv2i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmsleu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsleu.nxv2i32( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsleu.mask.nxv2i32( + , + , + , + , + i32); + +define @intrinsic_vmsleu_mask_vv_nxv2i32_nxv2i32( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv2i32_nxv2i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmsleu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsleu.nxv2i32( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmsleu.mask.nxv2i32( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsleu.nxv4i32( + , + , + i32); + +define @intrinsic_vmsleu_vv_nxv4i32_nxv4i32( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vv_nxv4i32_nxv4i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmsleu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsleu.nxv4i32( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsleu.mask.nxv4i32( + , + , + , + , + i32); + +define @intrinsic_vmsleu_mask_vv_nxv4i32_nxv4i32( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv4i32_nxv4i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmsleu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsleu.nxv4i32( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmsleu.mask.nxv4i32( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsleu.nxv8i32( + , + , + i32); + +define @intrinsic_vmsleu_vv_nxv8i32_nxv8i32( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vv_nxv8i32_nxv8i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmsleu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsleu.nxv8i32( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsleu.mask.nxv8i32( + , + , + , + , + i32); + +define @intrinsic_vmsleu_mask_vv_nxv8i32_nxv8i32( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv8i32_nxv8i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmsleu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsleu.nxv8i32( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmsleu.mask.nxv8i32( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsleu.nxv1i8.i8( + , + i8, + i32); + +define @intrinsic_vmsleu_vx_nxv1i8_i8( %0, i8 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vx_nxv1i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmsleu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsleu.nxv1i8.i8( + %0, + i8 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsleu.mask.nxv1i8.i8( + , + , + i8, + , + i32); + +define @intrinsic_vmsleu_mask_vx_nxv1i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv1i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmsleu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsleu.mask.nxv1i8.i8( + %0, + %1, + i8 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsleu.nxv2i8.i8( + , + i8, + i32); + +define @intrinsic_vmsleu_vx_nxv2i8_i8( %0, i8 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vx_nxv2i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmsleu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsleu.nxv2i8.i8( + %0, + i8 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsleu.mask.nxv2i8.i8( + , + , + i8, + , + i32); + +define @intrinsic_vmsleu_mask_vx_nxv2i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv2i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmsleu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsleu.mask.nxv2i8.i8( + %0, + %1, + i8 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsleu.nxv4i8.i8( + , + i8, + i32); + +define @intrinsic_vmsleu_vx_nxv4i8_i8( %0, i8 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vx_nxv4i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmsleu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsleu.nxv4i8.i8( + %0, + i8 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsleu.mask.nxv4i8.i8( + , + , + i8, + , + i32); + +define @intrinsic_vmsleu_mask_vx_nxv4i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv4i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmsleu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsleu.mask.nxv4i8.i8( + %0, + %1, + i8 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsleu.nxv8i8.i8( + , + i8, + i32); + +define @intrinsic_vmsleu_vx_nxv8i8_i8( %0, i8 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vx_nxv8i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmsleu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsleu.nxv8i8.i8( + %0, + i8 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsleu.mask.nxv8i8.i8( + , + , + i8, + , + i32); + +define @intrinsic_vmsleu_mask_vx_nxv8i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv8i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmsleu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsleu.mask.nxv8i8.i8( + %0, + %1, + i8 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsleu.nxv16i8.i8( + , + i8, + i32); + +define @intrinsic_vmsleu_vx_nxv16i8_i8( %0, i8 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vx_nxv16i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmsleu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsleu.nxv16i8.i8( + %0, + i8 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsleu.mask.nxv16i8.i8( + , + , + i8, + , + i32); + +define @intrinsic_vmsleu_mask_vx_nxv16i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv16i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmsleu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsleu.mask.nxv16i8.i8( + %0, + %1, + i8 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsleu.nxv32i8.i8( + , + i8, + i32); + +define @intrinsic_vmsleu_vx_nxv32i8_i8( %0, i8 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vx_nxv32i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmsleu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsleu.nxv32i8.i8( + %0, + i8 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsleu.mask.nxv32i8.i8( + , + , + i8, + , + i32); + +define @intrinsic_vmsleu_mask_vx_nxv32i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv32i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmsleu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsleu.mask.nxv32i8.i8( + %0, + %1, + i8 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsleu.nxv1i16.i16( + , + i16, + i32); + +define @intrinsic_vmsleu_vx_nxv1i16_i16( %0, i16 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vx_nxv1i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmsleu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsleu.nxv1i16.i16( + %0, + i16 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsleu.mask.nxv1i16.i16( + , + , + i16, + , + i32); + +define @intrinsic_vmsleu_mask_vx_nxv1i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv1i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmsleu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsleu.mask.nxv1i16.i16( + %0, + %1, + i16 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsleu.nxv2i16.i16( + , + i16, + i32); + +define @intrinsic_vmsleu_vx_nxv2i16_i16( %0, i16 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vx_nxv2i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmsleu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsleu.nxv2i16.i16( + %0, + i16 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsleu.mask.nxv2i16.i16( + , + , + i16, + , + i32); + +define @intrinsic_vmsleu_mask_vx_nxv2i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv2i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmsleu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsleu.mask.nxv2i16.i16( + %0, + %1, + i16 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsleu.nxv4i16.i16( + , + i16, + i32); + +define @intrinsic_vmsleu_vx_nxv4i16_i16( %0, i16 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vx_nxv4i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmsleu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsleu.nxv4i16.i16( + %0, + i16 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsleu.mask.nxv4i16.i16( + , + , + i16, + , + i32); + +define @intrinsic_vmsleu_mask_vx_nxv4i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv4i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmsleu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsleu.mask.nxv4i16.i16( + %0, + %1, + i16 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsleu.nxv8i16.i16( + , + i16, + i32); + +define @intrinsic_vmsleu_vx_nxv8i16_i16( %0, i16 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vx_nxv8i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmsleu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsleu.nxv8i16.i16( + %0, + i16 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsleu.mask.nxv8i16.i16( + , + , + i16, + , + i32); + +define @intrinsic_vmsleu_mask_vx_nxv8i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv8i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmsleu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsleu.mask.nxv8i16.i16( + %0, + %1, + i16 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsleu.nxv16i16.i16( + , + i16, + i32); + +define @intrinsic_vmsleu_vx_nxv16i16_i16( %0, i16 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vx_nxv16i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmsleu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsleu.nxv16i16.i16( + %0, + i16 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsleu.mask.nxv16i16.i16( + , + , + i16, + , + i32); + +define @intrinsic_vmsleu_mask_vx_nxv16i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv16i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmsleu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsleu.mask.nxv16i16.i16( + %0, + %1, + i16 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsleu.nxv1i32.i32( + , + i32, + i32); + +define @intrinsic_vmsleu_vx_nxv1i32_i32( %0, i32 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vx_nxv1i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmsleu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsleu.nxv1i32.i32( + %0, + i32 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsleu.mask.nxv1i32.i32( + , + , + i32, + , + i32); + +define @intrinsic_vmsleu_mask_vx_nxv1i32_i32( %0, %1, i32 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv1i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmsleu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsleu.mask.nxv1i32.i32( + %0, + %1, + i32 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsleu.nxv2i32.i32( + , + i32, + i32); + +define @intrinsic_vmsleu_vx_nxv2i32_i32( %0, i32 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vx_nxv2i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmsleu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsleu.nxv2i32.i32( + %0, + i32 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsleu.mask.nxv2i32.i32( + , + , + i32, + , + i32); + +define @intrinsic_vmsleu_mask_vx_nxv2i32_i32( %0, %1, i32 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv2i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmsleu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsleu.mask.nxv2i32.i32( + %0, + %1, + i32 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsleu.nxv4i32.i32( + , + i32, + i32); + +define @intrinsic_vmsleu_vx_nxv4i32_i32( %0, i32 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vx_nxv4i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmsleu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsleu.nxv4i32.i32( + %0, + i32 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsleu.mask.nxv4i32.i32( + , + , + i32, + , + i32); + +define @intrinsic_vmsleu_mask_vx_nxv4i32_i32( %0, %1, i32 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv4i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmsleu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsleu.mask.nxv4i32.i32( + %0, + %1, + i32 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsleu.nxv8i32.i32( + , + i32, + i32); + +define @intrinsic_vmsleu_vx_nxv8i32_i32( %0, i32 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vx_nxv8i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmsleu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsleu.nxv8i32.i32( + %0, + i32 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsleu.mask.nxv8i32.i32( + , + , + i32, + , + i32); + +define @intrinsic_vmsleu_mask_vx_nxv8i32_i32( %0, %1, i32 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv8i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmsleu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsleu.mask.nxv8i32.i32( + %0, + %1, + i32 %2, + %3, + i32 %4) + + ret %a +} + +define @intrinsic_vmsleu_vi_nxv1i8_i8( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vi_nxv1i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmsleu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsleu.nxv1i8.i8( + %0, + i8 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmsleu_mask_vi_nxv1i8_i8( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv1i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmsleu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsleu.mask.nxv1i8.i8( + %0, + %1, + i8 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vmsleu_vi_nxv2i8_i8( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vi_nxv2i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmsleu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsleu.nxv2i8.i8( + %0, + i8 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmsleu_mask_vi_nxv2i8_i8( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv2i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmsleu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsleu.mask.nxv2i8.i8( + %0, + %1, + i8 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vmsleu_vi_nxv4i8_i8( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vi_nxv4i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmsleu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsleu.nxv4i8.i8( + %0, + i8 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmsleu_mask_vi_nxv4i8_i8( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv4i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmsleu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsleu.mask.nxv4i8.i8( + %0, + %1, + i8 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vmsleu_vi_nxv8i8_i8( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vi_nxv8i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmsleu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsleu.nxv8i8.i8( + %0, + i8 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmsleu_mask_vi_nxv8i8_i8( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv8i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmsleu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsleu.mask.nxv8i8.i8( + %0, + %1, + i8 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vmsleu_vi_nxv16i8_i8( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vi_nxv16i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmsleu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsleu.nxv16i8.i8( + %0, + i8 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmsleu_mask_vi_nxv16i8_i8( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv16i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmsleu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsleu.mask.nxv16i8.i8( + %0, + %1, + i8 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vmsleu_vi_nxv32i8_i8( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vi_nxv32i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmsleu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsleu.nxv32i8.i8( + %0, + i8 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmsleu_mask_vi_nxv32i8_i8( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv32i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmsleu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsleu.mask.nxv32i8.i8( + %0, + %1, + i8 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vmsleu_vi_nxv1i16_i16( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vi_nxv1i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmsleu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsleu.nxv1i16.i16( + %0, + i16 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmsleu_mask_vi_nxv1i16_i16( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv1i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmsleu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsleu.mask.nxv1i16.i16( + %0, + %1, + i16 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vmsleu_vi_nxv2i16_i16( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vi_nxv2i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmsleu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsleu.nxv2i16.i16( + %0, + i16 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmsleu_mask_vi_nxv2i16_i16( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv2i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmsleu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsleu.mask.nxv2i16.i16( + %0, + %1, + i16 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vmsleu_vi_nxv4i16_i16( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vi_nxv4i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmsleu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsleu.nxv4i16.i16( + %0, + i16 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmsleu_mask_vi_nxv4i16_i16( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv4i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmsleu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsleu.mask.nxv4i16.i16( + %0, + %1, + i16 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vmsleu_vi_nxv8i16_i16( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vi_nxv8i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmsleu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsleu.nxv8i16.i16( + %0, + i16 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmsleu_mask_vi_nxv8i16_i16( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv8i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmsleu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsleu.mask.nxv8i16.i16( + %0, + %1, + i16 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vmsleu_vi_nxv16i16_i16( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vi_nxv16i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmsleu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsleu.nxv16i16.i16( + %0, + i16 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmsleu_mask_vi_nxv16i16_i16( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv16i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmsleu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsleu.mask.nxv16i16.i16( + %0, + %1, + i16 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vmsleu_vi_nxv1i32_i32( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vi_nxv1i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmsleu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsleu.nxv1i32.i32( + %0, + i32 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmsleu_mask_vi_nxv1i32_i32( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv1i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmsleu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsleu.mask.nxv1i32.i32( + %0, + %1, + i32 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vmsleu_vi_nxv2i32_i32( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vi_nxv2i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmsleu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsleu.nxv2i32.i32( + %0, + i32 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmsleu_mask_vi_nxv2i32_i32( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv2i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmsleu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsleu.mask.nxv2i32.i32( + %0, + %1, + i32 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vmsleu_vi_nxv4i32_i32( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vi_nxv4i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmsleu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsleu.nxv4i32.i32( + %0, + i32 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmsleu_mask_vi_nxv4i32_i32( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv4i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmsleu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsleu.mask.nxv4i32.i32( + %0, + %1, + i32 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vmsleu_vi_nxv8i32_i32( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vi_nxv8i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmsleu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsleu.nxv8i32.i32( + %0, + i32 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmsleu_mask_vi_nxv8i32_i32( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv8i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmsleu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsleu.mask.nxv8i32.i32( + %0, + %1, + i32 9, + %2, + i32 %3) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsleu-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmsleu-rv64.ll new file mode 100644 index 0000000000000..bc91e3f98696e --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vmsleu-rv64.ll @@ -0,0 +1,2017 @@ +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare @llvm.riscv.vmsleu.nxv1i8( + , + , + i64); + +define @intrinsic_vmsleu_vv_nxv1i8_nxv1i8( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vv_nxv1i8_nxv1i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmsleu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsleu.nxv1i8( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsleu.mask.nxv1i8( + , + , + , + , + i64); + +define @intrinsic_vmsleu_mask_vv_nxv1i8_nxv1i8( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv1i8_nxv1i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmsleu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsleu.nxv1i8( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmsleu.mask.nxv1i8( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsleu.nxv2i8( + , + , + i64); + +define @intrinsic_vmsleu_vv_nxv2i8_nxv2i8( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vv_nxv2i8_nxv2i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmsleu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsleu.nxv2i8( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsleu.mask.nxv2i8( + , + , + , + , + i64); + +define @intrinsic_vmsleu_mask_vv_nxv2i8_nxv2i8( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv2i8_nxv2i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmsleu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsleu.nxv2i8( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmsleu.mask.nxv2i8( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsleu.nxv4i8( + , + , + i64); + +define @intrinsic_vmsleu_vv_nxv4i8_nxv4i8( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vv_nxv4i8_nxv4i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmsleu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsleu.nxv4i8( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsleu.mask.nxv4i8( + , + , + , + , + i64); + +define @intrinsic_vmsleu_mask_vv_nxv4i8_nxv4i8( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv4i8_nxv4i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmsleu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsleu.nxv4i8( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmsleu.mask.nxv4i8( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsleu.nxv8i8( + , + , + i64); + +define @intrinsic_vmsleu_vv_nxv8i8_nxv8i8( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vv_nxv8i8_nxv8i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmsleu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsleu.nxv8i8( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsleu.mask.nxv8i8( + , + , + , + , + i64); + +define @intrinsic_vmsleu_mask_vv_nxv8i8_nxv8i8( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv8i8_nxv8i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmsleu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsleu.nxv8i8( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmsleu.mask.nxv8i8( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsleu.nxv16i8( + , + , + i64); + +define @intrinsic_vmsleu_vv_nxv16i8_nxv16i8( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vv_nxv16i8_nxv16i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmsleu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsleu.nxv16i8( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsleu.mask.nxv16i8( + , + , + , + , + i64); + +define @intrinsic_vmsleu_mask_vv_nxv16i8_nxv16i8( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv16i8_nxv16i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmsleu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsleu.nxv16i8( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmsleu.mask.nxv16i8( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsleu.nxv32i8( + , + , + i64); + +define @intrinsic_vmsleu_vv_nxv32i8_nxv32i8( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vv_nxv32i8_nxv32i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmsleu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsleu.nxv32i8( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsleu.mask.nxv32i8( + , + , + , + , + i64); + +define @intrinsic_vmsleu_mask_vv_nxv32i8_nxv32i8( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv32i8_nxv32i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmsleu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsleu.nxv32i8( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmsleu.mask.nxv32i8( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsleu.nxv1i16( + , + , + i64); + +define @intrinsic_vmsleu_vv_nxv1i16_nxv1i16( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vv_nxv1i16_nxv1i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmsleu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsleu.nxv1i16( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsleu.mask.nxv1i16( + , + , + , + , + i64); + +define @intrinsic_vmsleu_mask_vv_nxv1i16_nxv1i16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv1i16_nxv1i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmsleu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsleu.nxv1i16( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmsleu.mask.nxv1i16( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsleu.nxv2i16( + , + , + i64); + +define @intrinsic_vmsleu_vv_nxv2i16_nxv2i16( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vv_nxv2i16_nxv2i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmsleu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsleu.nxv2i16( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsleu.mask.nxv2i16( + , + , + , + , + i64); + +define @intrinsic_vmsleu_mask_vv_nxv2i16_nxv2i16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv2i16_nxv2i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmsleu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsleu.nxv2i16( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmsleu.mask.nxv2i16( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsleu.nxv4i16( + , + , + i64); + +define @intrinsic_vmsleu_vv_nxv4i16_nxv4i16( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vv_nxv4i16_nxv4i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmsleu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsleu.nxv4i16( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsleu.mask.nxv4i16( + , + , + , + , + i64); + +define @intrinsic_vmsleu_mask_vv_nxv4i16_nxv4i16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv4i16_nxv4i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmsleu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsleu.nxv4i16( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmsleu.mask.nxv4i16( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsleu.nxv8i16( + , + , + i64); + +define @intrinsic_vmsleu_vv_nxv8i16_nxv8i16( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vv_nxv8i16_nxv8i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmsleu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsleu.nxv8i16( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsleu.mask.nxv8i16( + , + , + , + , + i64); + +define @intrinsic_vmsleu_mask_vv_nxv8i16_nxv8i16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv8i16_nxv8i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmsleu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsleu.nxv8i16( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmsleu.mask.nxv8i16( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsleu.nxv16i16( + , + , + i64); + +define @intrinsic_vmsleu_vv_nxv16i16_nxv16i16( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vv_nxv16i16_nxv16i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmsleu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsleu.nxv16i16( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsleu.mask.nxv16i16( + , + , + , + , + i64); + +define @intrinsic_vmsleu_mask_vv_nxv16i16_nxv16i16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv16i16_nxv16i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmsleu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsleu.nxv16i16( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmsleu.mask.nxv16i16( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsleu.nxv1i32( + , + , + i64); + +define @intrinsic_vmsleu_vv_nxv1i32_nxv1i32( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vv_nxv1i32_nxv1i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmsleu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsleu.nxv1i32( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsleu.mask.nxv1i32( + , + , + , + , + i64); + +define @intrinsic_vmsleu_mask_vv_nxv1i32_nxv1i32( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv1i32_nxv1i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmsleu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsleu.nxv1i32( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmsleu.mask.nxv1i32( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsleu.nxv2i32( + , + , + i64); + +define @intrinsic_vmsleu_vv_nxv2i32_nxv2i32( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vv_nxv2i32_nxv2i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmsleu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsleu.nxv2i32( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsleu.mask.nxv2i32( + , + , + , + , + i64); + +define @intrinsic_vmsleu_mask_vv_nxv2i32_nxv2i32( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv2i32_nxv2i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmsleu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsleu.nxv2i32( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmsleu.mask.nxv2i32( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsleu.nxv4i32( + , + , + i64); + +define @intrinsic_vmsleu_vv_nxv4i32_nxv4i32( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vv_nxv4i32_nxv4i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmsleu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsleu.nxv4i32( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsleu.mask.nxv4i32( + , + , + , + , + i64); + +define @intrinsic_vmsleu_mask_vv_nxv4i32_nxv4i32( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv4i32_nxv4i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmsleu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsleu.nxv4i32( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmsleu.mask.nxv4i32( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsleu.nxv8i32( + , + , + i64); + +define @intrinsic_vmsleu_vv_nxv8i32_nxv8i32( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vv_nxv8i32_nxv8i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmsleu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsleu.nxv8i32( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsleu.mask.nxv8i32( + , + , + , + , + i64); + +define @intrinsic_vmsleu_mask_vv_nxv8i32_nxv8i32( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv8i32_nxv8i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmsleu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsleu.nxv8i32( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmsleu.mask.nxv8i32( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsleu.nxv1i64( + , + , + i64); + +define @intrinsic_vmsleu_vv_nxv1i64_nxv1i64( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vv_nxv1i64_nxv1i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vmsleu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsleu.nxv1i64( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsleu.mask.nxv1i64( + , + , + , + , + i64); + +define @intrinsic_vmsleu_mask_vv_nxv1i64_nxv1i64( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv1i64_nxv1i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vmsleu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsleu.nxv1i64( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmsleu.mask.nxv1i64( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsleu.nxv2i64( + , + , + i64); + +define @intrinsic_vmsleu_vv_nxv2i64_nxv2i64( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vv_nxv2i64_nxv2i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vmsleu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsleu.nxv2i64( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsleu.mask.nxv2i64( + , + , + , + , + i64); + +define @intrinsic_vmsleu_mask_vv_nxv2i64_nxv2i64( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv2i64_nxv2i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vmsleu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsleu.nxv2i64( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmsleu.mask.nxv2i64( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsleu.nxv4i64( + , + , + i64); + +define @intrinsic_vmsleu_vv_nxv4i64_nxv4i64( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vv_nxv4i64_nxv4i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vmsleu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsleu.nxv4i64( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsleu.mask.nxv4i64( + , + , + , + , + i64); + +define @intrinsic_vmsleu_mask_vv_nxv4i64_nxv4i64( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv4i64_nxv4i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vmsleu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsleu.nxv4i64( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmsleu.mask.nxv4i64( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsleu.nxv1i8.i8( + , + i8, + i64); + +define @intrinsic_vmsleu_vx_nxv1i8_i8( %0, i8 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vx_nxv1i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmsleu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsleu.nxv1i8.i8( + %0, + i8 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsleu.mask.nxv1i8.i8( + , + , + i8, + , + i64); + +define @intrinsic_vmsleu_mask_vx_nxv1i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv1i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmsleu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsleu.mask.nxv1i8.i8( + %0, + %1, + i8 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsleu.nxv2i8.i8( + , + i8, + i64); + +define @intrinsic_vmsleu_vx_nxv2i8_i8( %0, i8 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vx_nxv2i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmsleu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsleu.nxv2i8.i8( + %0, + i8 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsleu.mask.nxv2i8.i8( + , + , + i8, + , + i64); + +define @intrinsic_vmsleu_mask_vx_nxv2i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv2i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmsleu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsleu.mask.nxv2i8.i8( + %0, + %1, + i8 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsleu.nxv4i8.i8( + , + i8, + i64); + +define @intrinsic_vmsleu_vx_nxv4i8_i8( %0, i8 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vx_nxv4i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmsleu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsleu.nxv4i8.i8( + %0, + i8 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsleu.mask.nxv4i8.i8( + , + , + i8, + , + i64); + +define @intrinsic_vmsleu_mask_vx_nxv4i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv4i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmsleu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsleu.mask.nxv4i8.i8( + %0, + %1, + i8 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsleu.nxv8i8.i8( + , + i8, + i64); + +define @intrinsic_vmsleu_vx_nxv8i8_i8( %0, i8 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vx_nxv8i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmsleu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsleu.nxv8i8.i8( + %0, + i8 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsleu.mask.nxv8i8.i8( + , + , + i8, + , + i64); + +define @intrinsic_vmsleu_mask_vx_nxv8i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv8i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmsleu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsleu.mask.nxv8i8.i8( + %0, + %1, + i8 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsleu.nxv16i8.i8( + , + i8, + i64); + +define @intrinsic_vmsleu_vx_nxv16i8_i8( %0, i8 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vx_nxv16i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmsleu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsleu.nxv16i8.i8( + %0, + i8 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsleu.mask.nxv16i8.i8( + , + , + i8, + , + i64); + +define @intrinsic_vmsleu_mask_vx_nxv16i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv16i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmsleu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsleu.mask.nxv16i8.i8( + %0, + %1, + i8 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsleu.nxv32i8.i8( + , + i8, + i64); + +define @intrinsic_vmsleu_vx_nxv32i8_i8( %0, i8 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vx_nxv32i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmsleu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsleu.nxv32i8.i8( + %0, + i8 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsleu.mask.nxv32i8.i8( + , + , + i8, + , + i64); + +define @intrinsic_vmsleu_mask_vx_nxv32i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv32i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmsleu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsleu.mask.nxv32i8.i8( + %0, + %1, + i8 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsleu.nxv1i16.i16( + , + i16, + i64); + +define @intrinsic_vmsleu_vx_nxv1i16_i16( %0, i16 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vx_nxv1i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmsleu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsleu.nxv1i16.i16( + %0, + i16 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsleu.mask.nxv1i16.i16( + , + , + i16, + , + i64); + +define @intrinsic_vmsleu_mask_vx_nxv1i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv1i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmsleu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsleu.mask.nxv1i16.i16( + %0, + %1, + i16 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsleu.nxv2i16.i16( + , + i16, + i64); + +define @intrinsic_vmsleu_vx_nxv2i16_i16( %0, i16 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vx_nxv2i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmsleu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsleu.nxv2i16.i16( + %0, + i16 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsleu.mask.nxv2i16.i16( + , + , + i16, + , + i64); + +define @intrinsic_vmsleu_mask_vx_nxv2i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv2i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmsleu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsleu.mask.nxv2i16.i16( + %0, + %1, + i16 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsleu.nxv4i16.i16( + , + i16, + i64); + +define @intrinsic_vmsleu_vx_nxv4i16_i16( %0, i16 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vx_nxv4i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmsleu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsleu.nxv4i16.i16( + %0, + i16 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsleu.mask.nxv4i16.i16( + , + , + i16, + , + i64); + +define @intrinsic_vmsleu_mask_vx_nxv4i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv4i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmsleu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsleu.mask.nxv4i16.i16( + %0, + %1, + i16 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsleu.nxv8i16.i16( + , + i16, + i64); + +define @intrinsic_vmsleu_vx_nxv8i16_i16( %0, i16 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vx_nxv8i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmsleu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsleu.nxv8i16.i16( + %0, + i16 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsleu.mask.nxv8i16.i16( + , + , + i16, + , + i64); + +define @intrinsic_vmsleu_mask_vx_nxv8i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv8i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmsleu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsleu.mask.nxv8i16.i16( + %0, + %1, + i16 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsleu.nxv16i16.i16( + , + i16, + i64); + +define @intrinsic_vmsleu_vx_nxv16i16_i16( %0, i16 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vx_nxv16i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmsleu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsleu.nxv16i16.i16( + %0, + i16 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsleu.mask.nxv16i16.i16( + , + , + i16, + , + i64); + +define @intrinsic_vmsleu_mask_vx_nxv16i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv16i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmsleu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsleu.mask.nxv16i16.i16( + %0, + %1, + i16 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsleu.nxv1i32.i32( + , + i32, + i64); + +define @intrinsic_vmsleu_vx_nxv1i32_i32( %0, i32 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vx_nxv1i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmsleu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsleu.nxv1i32.i32( + %0, + i32 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsleu.mask.nxv1i32.i32( + , + , + i32, + , + i64); + +define @intrinsic_vmsleu_mask_vx_nxv1i32_i32( %0, %1, i32 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv1i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmsleu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsleu.mask.nxv1i32.i32( + %0, + %1, + i32 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsleu.nxv2i32.i32( + , + i32, + i64); + +define @intrinsic_vmsleu_vx_nxv2i32_i32( %0, i32 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vx_nxv2i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmsleu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsleu.nxv2i32.i32( + %0, + i32 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsleu.mask.nxv2i32.i32( + , + , + i32, + , + i64); + +define @intrinsic_vmsleu_mask_vx_nxv2i32_i32( %0, %1, i32 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv2i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmsleu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsleu.mask.nxv2i32.i32( + %0, + %1, + i32 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsleu.nxv4i32.i32( + , + i32, + i64); + +define @intrinsic_vmsleu_vx_nxv4i32_i32( %0, i32 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vx_nxv4i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmsleu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsleu.nxv4i32.i32( + %0, + i32 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsleu.mask.nxv4i32.i32( + , + , + i32, + , + i64); + +define @intrinsic_vmsleu_mask_vx_nxv4i32_i32( %0, %1, i32 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv4i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmsleu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsleu.mask.nxv4i32.i32( + %0, + %1, + i32 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsleu.nxv8i32.i32( + , + i32, + i64); + +define @intrinsic_vmsleu_vx_nxv8i32_i32( %0, i32 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vx_nxv8i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmsleu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsleu.nxv8i32.i32( + %0, + i32 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsleu.mask.nxv8i32.i32( + , + , + i32, + , + i64); + +define @intrinsic_vmsleu_mask_vx_nxv8i32_i32( %0, %1, i32 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv8i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmsleu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsleu.mask.nxv8i32.i32( + %0, + %1, + i32 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsleu.nxv1i64.i64( + , + i64, + i64); + +define @intrinsic_vmsleu_vx_nxv1i64_i64( %0, i64 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vx_nxv1i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vmsleu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsleu.nxv1i64.i64( + %0, + i64 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsleu.mask.nxv1i64.i64( + , + , + i64, + , + i64); + +define @intrinsic_vmsleu_mask_vx_nxv1i64_i64( %0, %1, i64 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv1i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vmsleu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsleu.mask.nxv1i64.i64( + %0, + %1, + i64 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsleu.nxv2i64.i64( + , + i64, + i64); + +define @intrinsic_vmsleu_vx_nxv2i64_i64( %0, i64 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vx_nxv2i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vmsleu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsleu.nxv2i64.i64( + %0, + i64 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsleu.mask.nxv2i64.i64( + , + , + i64, + , + i64); + +define @intrinsic_vmsleu_mask_vx_nxv2i64_i64( %0, %1, i64 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv2i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vmsleu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsleu.mask.nxv2i64.i64( + %0, + %1, + i64 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsleu.nxv4i64.i64( + , + i64, + i64); + +define @intrinsic_vmsleu_vx_nxv4i64_i64( %0, i64 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vx_nxv4i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vmsleu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsleu.nxv4i64.i64( + %0, + i64 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsleu.mask.nxv4i64.i64( + , + , + i64, + , + i64); + +define @intrinsic_vmsleu_mask_vx_nxv4i64_i64( %0, %1, i64 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv4i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vmsleu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsleu.mask.nxv4i64.i64( + %0, + %1, + i64 %2, + %3, + i64 %4) + + ret %a +} + +define @intrinsic_vmsleu_vi_nxv1i8_i8( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vi_nxv1i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmsleu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsleu.nxv1i8.i8( + %0, + i8 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmsleu_mask_vi_nxv1i8_i8( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv1i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmsleu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsleu.mask.nxv1i8.i8( + %0, + %1, + i8 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmsleu_vi_nxv2i8_i8( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vi_nxv2i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmsleu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsleu.nxv2i8.i8( + %0, + i8 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmsleu_mask_vi_nxv2i8_i8( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv2i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmsleu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsleu.mask.nxv2i8.i8( + %0, + %1, + i8 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmsleu_vi_nxv4i8_i8( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vi_nxv4i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmsleu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsleu.nxv4i8.i8( + %0, + i8 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmsleu_mask_vi_nxv4i8_i8( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv4i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmsleu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsleu.mask.nxv4i8.i8( + %0, + %1, + i8 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmsleu_vi_nxv8i8_i8( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vi_nxv8i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmsleu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsleu.nxv8i8.i8( + %0, + i8 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmsleu_mask_vi_nxv8i8_i8( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv8i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmsleu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsleu.mask.nxv8i8.i8( + %0, + %1, + i8 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmsleu_vi_nxv16i8_i8( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vi_nxv16i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmsleu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsleu.nxv16i8.i8( + %0, + i8 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmsleu_mask_vi_nxv16i8_i8( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv16i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmsleu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsleu.mask.nxv16i8.i8( + %0, + %1, + i8 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmsleu_vi_nxv32i8_i8( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vi_nxv32i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmsleu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsleu.nxv32i8.i8( + %0, + i8 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmsleu_mask_vi_nxv32i8_i8( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv32i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmsleu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsleu.mask.nxv32i8.i8( + %0, + %1, + i8 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmsleu_vi_nxv1i16_i16( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vi_nxv1i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmsleu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsleu.nxv1i16.i16( + %0, + i16 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmsleu_mask_vi_nxv1i16_i16( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv1i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmsleu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsleu.mask.nxv1i16.i16( + %0, + %1, + i16 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmsleu_vi_nxv2i16_i16( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vi_nxv2i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmsleu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsleu.nxv2i16.i16( + %0, + i16 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmsleu_mask_vi_nxv2i16_i16( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv2i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmsleu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsleu.mask.nxv2i16.i16( + %0, + %1, + i16 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmsleu_vi_nxv4i16_i16( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vi_nxv4i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmsleu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsleu.nxv4i16.i16( + %0, + i16 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmsleu_mask_vi_nxv4i16_i16( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv4i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmsleu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsleu.mask.nxv4i16.i16( + %0, + %1, + i16 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmsleu_vi_nxv8i16_i16( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vi_nxv8i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmsleu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsleu.nxv8i16.i16( + %0, + i16 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmsleu_mask_vi_nxv8i16_i16( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv8i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmsleu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsleu.mask.nxv8i16.i16( + %0, + %1, + i16 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmsleu_vi_nxv16i16_i16( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vi_nxv16i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmsleu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsleu.nxv16i16.i16( + %0, + i16 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmsleu_mask_vi_nxv16i16_i16( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv16i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmsleu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsleu.mask.nxv16i16.i16( + %0, + %1, + i16 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmsleu_vi_nxv1i32_i32( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vi_nxv1i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmsleu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsleu.nxv1i32.i32( + %0, + i32 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmsleu_mask_vi_nxv1i32_i32( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv1i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmsleu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsleu.mask.nxv1i32.i32( + %0, + %1, + i32 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmsleu_vi_nxv2i32_i32( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vi_nxv2i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmsleu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsleu.nxv2i32.i32( + %0, + i32 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmsleu_mask_vi_nxv2i32_i32( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv2i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmsleu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsleu.mask.nxv2i32.i32( + %0, + %1, + i32 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmsleu_vi_nxv4i32_i32( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vi_nxv4i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmsleu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsleu.nxv4i32.i32( + %0, + i32 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmsleu_mask_vi_nxv4i32_i32( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv4i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmsleu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsleu.mask.nxv4i32.i32( + %0, + %1, + i32 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmsleu_vi_nxv8i32_i32( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vi_nxv8i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmsleu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsleu.nxv8i32.i32( + %0, + i32 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmsleu_mask_vi_nxv8i32_i32( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv8i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmsleu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsleu.mask.nxv8i32.i32( + %0, + %1, + i32 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmsleu_vi_nxv1i64_i64( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vi_nxv1i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vmsleu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsleu.nxv1i64.i64( + %0, + i64 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmsleu_mask_vi_nxv1i64_i64( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv1i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vmsleu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsleu.mask.nxv1i64.i64( + %0, + %1, + i64 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmsleu_vi_nxv2i64_i64( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vi_nxv2i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vmsleu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsleu.nxv2i64.i64( + %0, + i64 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmsleu_mask_vi_nxv2i64_i64( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv2i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vmsleu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsleu.mask.nxv2i64.i64( + %0, + %1, + i64 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmsleu_vi_nxv4i64_i64( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_vi_nxv4i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vmsleu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsleu.nxv4i64.i64( + %0, + i64 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmsleu_mask_vi_nxv4i64_i64( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv4i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vmsleu.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsleu.mask.nxv4i64.i64( + %0, + %1, + i64 9, + %2, + i64 %3) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vmslt-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmslt-rv32.ll new file mode 100644 index 0000000000000..68997a6e45556 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vmslt-rv32.ll @@ -0,0 +1,1261 @@ +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare @llvm.riscv.vmslt.nxv1i8( + , + , + i32); + +define @intrinsic_vmslt_vv_nxv1i8_nxv1i8( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_vv_nxv1i8_nxv1i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmslt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmslt.nxv1i8( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmslt.mask.nxv1i8( + , + , + , + , + i32); + +define @intrinsic_vmslt_mask_vv_nxv1i8_nxv1i8( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv1i8_nxv1i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmslt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmslt.nxv1i8( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmslt.mask.nxv1i8( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmslt.nxv2i8( + , + , + i32); + +define @intrinsic_vmslt_vv_nxv2i8_nxv2i8( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_vv_nxv2i8_nxv2i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmslt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmslt.nxv2i8( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmslt.mask.nxv2i8( + , + , + , + , + i32); + +define @intrinsic_vmslt_mask_vv_nxv2i8_nxv2i8( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv2i8_nxv2i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmslt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmslt.nxv2i8( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmslt.mask.nxv2i8( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmslt.nxv4i8( + , + , + i32); + +define @intrinsic_vmslt_vv_nxv4i8_nxv4i8( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_vv_nxv4i8_nxv4i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmslt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmslt.nxv4i8( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmslt.mask.nxv4i8( + , + , + , + , + i32); + +define @intrinsic_vmslt_mask_vv_nxv4i8_nxv4i8( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv4i8_nxv4i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmslt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmslt.nxv4i8( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmslt.mask.nxv4i8( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmslt.nxv8i8( + , + , + i32); + +define @intrinsic_vmslt_vv_nxv8i8_nxv8i8( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_vv_nxv8i8_nxv8i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmslt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmslt.nxv8i8( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmslt.mask.nxv8i8( + , + , + , + , + i32); + +define @intrinsic_vmslt_mask_vv_nxv8i8_nxv8i8( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv8i8_nxv8i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmslt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmslt.nxv8i8( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmslt.mask.nxv8i8( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmslt.nxv16i8( + , + , + i32); + +define @intrinsic_vmslt_vv_nxv16i8_nxv16i8( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_vv_nxv16i8_nxv16i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmslt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmslt.nxv16i8( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmslt.mask.nxv16i8( + , + , + , + , + i32); + +define @intrinsic_vmslt_mask_vv_nxv16i8_nxv16i8( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv16i8_nxv16i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmslt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmslt.nxv16i8( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmslt.mask.nxv16i8( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmslt.nxv32i8( + , + , + i32); + +define @intrinsic_vmslt_vv_nxv32i8_nxv32i8( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_vv_nxv32i8_nxv32i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmslt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmslt.nxv32i8( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmslt.mask.nxv32i8( + , + , + , + , + i32); + +define @intrinsic_vmslt_mask_vv_nxv32i8_nxv32i8( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv32i8_nxv32i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmslt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmslt.nxv32i8( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmslt.mask.nxv32i8( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmslt.nxv1i16( + , + , + i32); + +define @intrinsic_vmslt_vv_nxv1i16_nxv1i16( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_vv_nxv1i16_nxv1i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmslt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmslt.nxv1i16( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmslt.mask.nxv1i16( + , + , + , + , + i32); + +define @intrinsic_vmslt_mask_vv_nxv1i16_nxv1i16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv1i16_nxv1i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmslt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmslt.nxv1i16( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmslt.mask.nxv1i16( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmslt.nxv2i16( + , + , + i32); + +define @intrinsic_vmslt_vv_nxv2i16_nxv2i16( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_vv_nxv2i16_nxv2i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmslt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmslt.nxv2i16( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmslt.mask.nxv2i16( + , + , + , + , + i32); + +define @intrinsic_vmslt_mask_vv_nxv2i16_nxv2i16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv2i16_nxv2i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmslt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmslt.nxv2i16( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmslt.mask.nxv2i16( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmslt.nxv4i16( + , + , + i32); + +define @intrinsic_vmslt_vv_nxv4i16_nxv4i16( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_vv_nxv4i16_nxv4i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmslt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmslt.nxv4i16( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmslt.mask.nxv4i16( + , + , + , + , + i32); + +define @intrinsic_vmslt_mask_vv_nxv4i16_nxv4i16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv4i16_nxv4i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmslt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmslt.nxv4i16( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmslt.mask.nxv4i16( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmslt.nxv8i16( + , + , + i32); + +define @intrinsic_vmslt_vv_nxv8i16_nxv8i16( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_vv_nxv8i16_nxv8i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmslt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmslt.nxv8i16( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmslt.mask.nxv8i16( + , + , + , + , + i32); + +define @intrinsic_vmslt_mask_vv_nxv8i16_nxv8i16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv8i16_nxv8i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmslt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmslt.nxv8i16( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmslt.mask.nxv8i16( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmslt.nxv16i16( + , + , + i32); + +define @intrinsic_vmslt_vv_nxv16i16_nxv16i16( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_vv_nxv16i16_nxv16i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmslt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmslt.nxv16i16( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmslt.mask.nxv16i16( + , + , + , + , + i32); + +define @intrinsic_vmslt_mask_vv_nxv16i16_nxv16i16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv16i16_nxv16i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmslt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmslt.nxv16i16( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmslt.mask.nxv16i16( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmslt.nxv1i32( + , + , + i32); + +define @intrinsic_vmslt_vv_nxv1i32_nxv1i32( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_vv_nxv1i32_nxv1i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmslt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmslt.nxv1i32( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmslt.mask.nxv1i32( + , + , + , + , + i32); + +define @intrinsic_vmslt_mask_vv_nxv1i32_nxv1i32( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv1i32_nxv1i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmslt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmslt.nxv1i32( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmslt.mask.nxv1i32( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmslt.nxv2i32( + , + , + i32); + +define @intrinsic_vmslt_vv_nxv2i32_nxv2i32( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_vv_nxv2i32_nxv2i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmslt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmslt.nxv2i32( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmslt.mask.nxv2i32( + , + , + , + , + i32); + +define @intrinsic_vmslt_mask_vv_nxv2i32_nxv2i32( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv2i32_nxv2i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmslt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmslt.nxv2i32( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmslt.mask.nxv2i32( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmslt.nxv4i32( + , + , + i32); + +define @intrinsic_vmslt_vv_nxv4i32_nxv4i32( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_vv_nxv4i32_nxv4i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmslt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmslt.nxv4i32( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmslt.mask.nxv4i32( + , + , + , + , + i32); + +define @intrinsic_vmslt_mask_vv_nxv4i32_nxv4i32( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv4i32_nxv4i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmslt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmslt.nxv4i32( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmslt.mask.nxv4i32( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmslt.nxv8i32( + , + , + i32); + +define @intrinsic_vmslt_vv_nxv8i32_nxv8i32( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_vv_nxv8i32_nxv8i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmslt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmslt.nxv8i32( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmslt.mask.nxv8i32( + , + , + , + , + i32); + +define @intrinsic_vmslt_mask_vv_nxv8i32_nxv8i32( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv8i32_nxv8i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmslt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmslt.nxv8i32( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmslt.mask.nxv8i32( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmslt.nxv1i8.i8( + , + i8, + i32); + +define @intrinsic_vmslt_vx_nxv1i8_i8( %0, i8 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_vx_nxv1i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmslt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmslt.nxv1i8.i8( + %0, + i8 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmslt.mask.nxv1i8.i8( + , + , + i8, + , + i32); + +define @intrinsic_vmslt_mask_vx_nxv1i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv1i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmslt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmslt.mask.nxv1i8.i8( + %0, + %1, + i8 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmslt.nxv2i8.i8( + , + i8, + i32); + +define @intrinsic_vmslt_vx_nxv2i8_i8( %0, i8 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_vx_nxv2i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmslt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmslt.nxv2i8.i8( + %0, + i8 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmslt.mask.nxv2i8.i8( + , + , + i8, + , + i32); + +define @intrinsic_vmslt_mask_vx_nxv2i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv2i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmslt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmslt.mask.nxv2i8.i8( + %0, + %1, + i8 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmslt.nxv4i8.i8( + , + i8, + i32); + +define @intrinsic_vmslt_vx_nxv4i8_i8( %0, i8 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_vx_nxv4i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmslt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmslt.nxv4i8.i8( + %0, + i8 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmslt.mask.nxv4i8.i8( + , + , + i8, + , + i32); + +define @intrinsic_vmslt_mask_vx_nxv4i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv4i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmslt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmslt.mask.nxv4i8.i8( + %0, + %1, + i8 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmslt.nxv8i8.i8( + , + i8, + i32); + +define @intrinsic_vmslt_vx_nxv8i8_i8( %0, i8 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_vx_nxv8i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmslt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmslt.nxv8i8.i8( + %0, + i8 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmslt.mask.nxv8i8.i8( + , + , + i8, + , + i32); + +define @intrinsic_vmslt_mask_vx_nxv8i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv8i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmslt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmslt.mask.nxv8i8.i8( + %0, + %1, + i8 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmslt.nxv16i8.i8( + , + i8, + i32); + +define @intrinsic_vmslt_vx_nxv16i8_i8( %0, i8 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_vx_nxv16i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmslt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmslt.nxv16i8.i8( + %0, + i8 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmslt.mask.nxv16i8.i8( + , + , + i8, + , + i32); + +define @intrinsic_vmslt_mask_vx_nxv16i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv16i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmslt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmslt.mask.nxv16i8.i8( + %0, + %1, + i8 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmslt.nxv32i8.i8( + , + i8, + i32); + +define @intrinsic_vmslt_vx_nxv32i8_i8( %0, i8 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_vx_nxv32i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmslt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmslt.nxv32i8.i8( + %0, + i8 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmslt.mask.nxv32i8.i8( + , + , + i8, + , + i32); + +define @intrinsic_vmslt_mask_vx_nxv32i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv32i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmslt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmslt.mask.nxv32i8.i8( + %0, + %1, + i8 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmslt.nxv1i16.i16( + , + i16, + i32); + +define @intrinsic_vmslt_vx_nxv1i16_i16( %0, i16 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_vx_nxv1i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmslt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmslt.nxv1i16.i16( + %0, + i16 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmslt.mask.nxv1i16.i16( + , + , + i16, + , + i32); + +define @intrinsic_vmslt_mask_vx_nxv1i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv1i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmslt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmslt.mask.nxv1i16.i16( + %0, + %1, + i16 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmslt.nxv2i16.i16( + , + i16, + i32); + +define @intrinsic_vmslt_vx_nxv2i16_i16( %0, i16 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_vx_nxv2i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmslt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmslt.nxv2i16.i16( + %0, + i16 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmslt.mask.nxv2i16.i16( + , + , + i16, + , + i32); + +define @intrinsic_vmslt_mask_vx_nxv2i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv2i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmslt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmslt.mask.nxv2i16.i16( + %0, + %1, + i16 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmslt.nxv4i16.i16( + , + i16, + i32); + +define @intrinsic_vmslt_vx_nxv4i16_i16( %0, i16 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_vx_nxv4i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmslt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmslt.nxv4i16.i16( + %0, + i16 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmslt.mask.nxv4i16.i16( + , + , + i16, + , + i32); + +define @intrinsic_vmslt_mask_vx_nxv4i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv4i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmslt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmslt.mask.nxv4i16.i16( + %0, + %1, + i16 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmslt.nxv8i16.i16( + , + i16, + i32); + +define @intrinsic_vmslt_vx_nxv8i16_i16( %0, i16 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_vx_nxv8i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmslt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmslt.nxv8i16.i16( + %0, + i16 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmslt.mask.nxv8i16.i16( + , + , + i16, + , + i32); + +define @intrinsic_vmslt_mask_vx_nxv8i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv8i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmslt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmslt.mask.nxv8i16.i16( + %0, + %1, + i16 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmslt.nxv16i16.i16( + , + i16, + i32); + +define @intrinsic_vmslt_vx_nxv16i16_i16( %0, i16 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_vx_nxv16i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmslt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmslt.nxv16i16.i16( + %0, + i16 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmslt.mask.nxv16i16.i16( + , + , + i16, + , + i32); + +define @intrinsic_vmslt_mask_vx_nxv16i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv16i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmslt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmslt.mask.nxv16i16.i16( + %0, + %1, + i16 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmslt.nxv1i32.i32( + , + i32, + i32); + +define @intrinsic_vmslt_vx_nxv1i32_i32( %0, i32 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_vx_nxv1i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmslt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmslt.nxv1i32.i32( + %0, + i32 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmslt.mask.nxv1i32.i32( + , + , + i32, + , + i32); + +define @intrinsic_vmslt_mask_vx_nxv1i32_i32( %0, %1, i32 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv1i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmslt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmslt.mask.nxv1i32.i32( + %0, + %1, + i32 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmslt.nxv2i32.i32( + , + i32, + i32); + +define @intrinsic_vmslt_vx_nxv2i32_i32( %0, i32 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_vx_nxv2i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmslt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmslt.nxv2i32.i32( + %0, + i32 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmslt.mask.nxv2i32.i32( + , + , + i32, + , + i32); + +define @intrinsic_vmslt_mask_vx_nxv2i32_i32( %0, %1, i32 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv2i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmslt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmslt.mask.nxv2i32.i32( + %0, + %1, + i32 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmslt.nxv4i32.i32( + , + i32, + i32); + +define @intrinsic_vmslt_vx_nxv4i32_i32( %0, i32 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_vx_nxv4i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmslt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmslt.nxv4i32.i32( + %0, + i32 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmslt.mask.nxv4i32.i32( + , + , + i32, + , + i32); + +define @intrinsic_vmslt_mask_vx_nxv4i32_i32( %0, %1, i32 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv4i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmslt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmslt.mask.nxv4i32.i32( + %0, + %1, + i32 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmslt.nxv8i32.i32( + , + i32, + i32); + +define @intrinsic_vmslt_vx_nxv8i32_i32( %0, i32 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_vx_nxv8i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmslt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmslt.nxv8i32.i32( + %0, + i32 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmslt.mask.nxv8i32.i32( + , + , + i32, + , + i32); + +define @intrinsic_vmslt_mask_vx_nxv8i32_i32( %0, %1, i32 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv8i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmslt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmslt.mask.nxv8i32.i32( + %0, + %1, + i32 %2, + %3, + i32 %4) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vmslt-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmslt-rv64.ll new file mode 100644 index 0000000000000..78178c05c58b7 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vmslt-rv64.ll @@ -0,0 +1,1513 @@ +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare @llvm.riscv.vmslt.nxv1i8( + , + , + i64); + +define @intrinsic_vmslt_vv_nxv1i8_nxv1i8( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_vv_nxv1i8_nxv1i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmslt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmslt.nxv1i8( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmslt.mask.nxv1i8( + , + , + , + , + i64); + +define @intrinsic_vmslt_mask_vv_nxv1i8_nxv1i8( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv1i8_nxv1i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmslt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmslt.nxv1i8( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmslt.mask.nxv1i8( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmslt.nxv2i8( + , + , + i64); + +define @intrinsic_vmslt_vv_nxv2i8_nxv2i8( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_vv_nxv2i8_nxv2i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmslt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmslt.nxv2i8( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmslt.mask.nxv2i8( + , + , + , + , + i64); + +define @intrinsic_vmslt_mask_vv_nxv2i8_nxv2i8( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv2i8_nxv2i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmslt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmslt.nxv2i8( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmslt.mask.nxv2i8( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmslt.nxv4i8( + , + , + i64); + +define @intrinsic_vmslt_vv_nxv4i8_nxv4i8( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_vv_nxv4i8_nxv4i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmslt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmslt.nxv4i8( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmslt.mask.nxv4i8( + , + , + , + , + i64); + +define @intrinsic_vmslt_mask_vv_nxv4i8_nxv4i8( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv4i8_nxv4i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmslt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmslt.nxv4i8( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmslt.mask.nxv4i8( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmslt.nxv8i8( + , + , + i64); + +define @intrinsic_vmslt_vv_nxv8i8_nxv8i8( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_vv_nxv8i8_nxv8i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmslt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmslt.nxv8i8( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmslt.mask.nxv8i8( + , + , + , + , + i64); + +define @intrinsic_vmslt_mask_vv_nxv8i8_nxv8i8( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv8i8_nxv8i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmslt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmslt.nxv8i8( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmslt.mask.nxv8i8( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmslt.nxv16i8( + , + , + i64); + +define @intrinsic_vmslt_vv_nxv16i8_nxv16i8( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_vv_nxv16i8_nxv16i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmslt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmslt.nxv16i8( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmslt.mask.nxv16i8( + , + , + , + , + i64); + +define @intrinsic_vmslt_mask_vv_nxv16i8_nxv16i8( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv16i8_nxv16i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmslt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmslt.nxv16i8( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmslt.mask.nxv16i8( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmslt.nxv32i8( + , + , + i64); + +define @intrinsic_vmslt_vv_nxv32i8_nxv32i8( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_vv_nxv32i8_nxv32i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmslt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmslt.nxv32i8( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmslt.mask.nxv32i8( + , + , + , + , + i64); + +define @intrinsic_vmslt_mask_vv_nxv32i8_nxv32i8( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv32i8_nxv32i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmslt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmslt.nxv32i8( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmslt.mask.nxv32i8( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmslt.nxv1i16( + , + , + i64); + +define @intrinsic_vmslt_vv_nxv1i16_nxv1i16( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_vv_nxv1i16_nxv1i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmslt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmslt.nxv1i16( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmslt.mask.nxv1i16( + , + , + , + , + i64); + +define @intrinsic_vmslt_mask_vv_nxv1i16_nxv1i16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv1i16_nxv1i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmslt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmslt.nxv1i16( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmslt.mask.nxv1i16( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmslt.nxv2i16( + , + , + i64); + +define @intrinsic_vmslt_vv_nxv2i16_nxv2i16( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_vv_nxv2i16_nxv2i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmslt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmslt.nxv2i16( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmslt.mask.nxv2i16( + , + , + , + , + i64); + +define @intrinsic_vmslt_mask_vv_nxv2i16_nxv2i16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv2i16_nxv2i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmslt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmslt.nxv2i16( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmslt.mask.nxv2i16( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmslt.nxv4i16( + , + , + i64); + +define @intrinsic_vmslt_vv_nxv4i16_nxv4i16( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_vv_nxv4i16_nxv4i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmslt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmslt.nxv4i16( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmslt.mask.nxv4i16( + , + , + , + , + i64); + +define @intrinsic_vmslt_mask_vv_nxv4i16_nxv4i16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv4i16_nxv4i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmslt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmslt.nxv4i16( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmslt.mask.nxv4i16( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmslt.nxv8i16( + , + , + i64); + +define @intrinsic_vmslt_vv_nxv8i16_nxv8i16( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_vv_nxv8i16_nxv8i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmslt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmslt.nxv8i16( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmslt.mask.nxv8i16( + , + , + , + , + i64); + +define @intrinsic_vmslt_mask_vv_nxv8i16_nxv8i16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv8i16_nxv8i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmslt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmslt.nxv8i16( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmslt.mask.nxv8i16( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmslt.nxv16i16( + , + , + i64); + +define @intrinsic_vmslt_vv_nxv16i16_nxv16i16( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_vv_nxv16i16_nxv16i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmslt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmslt.nxv16i16( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmslt.mask.nxv16i16( + , + , + , + , + i64); + +define @intrinsic_vmslt_mask_vv_nxv16i16_nxv16i16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv16i16_nxv16i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmslt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmslt.nxv16i16( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmslt.mask.nxv16i16( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmslt.nxv1i32( + , + , + i64); + +define @intrinsic_vmslt_vv_nxv1i32_nxv1i32( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_vv_nxv1i32_nxv1i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmslt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmslt.nxv1i32( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmslt.mask.nxv1i32( + , + , + , + , + i64); + +define @intrinsic_vmslt_mask_vv_nxv1i32_nxv1i32( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv1i32_nxv1i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmslt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmslt.nxv1i32( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmslt.mask.nxv1i32( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmslt.nxv2i32( + , + , + i64); + +define @intrinsic_vmslt_vv_nxv2i32_nxv2i32( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_vv_nxv2i32_nxv2i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmslt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmslt.nxv2i32( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmslt.mask.nxv2i32( + , + , + , + , + i64); + +define @intrinsic_vmslt_mask_vv_nxv2i32_nxv2i32( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv2i32_nxv2i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmslt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmslt.nxv2i32( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmslt.mask.nxv2i32( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmslt.nxv4i32( + , + , + i64); + +define @intrinsic_vmslt_vv_nxv4i32_nxv4i32( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_vv_nxv4i32_nxv4i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmslt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmslt.nxv4i32( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmslt.mask.nxv4i32( + , + , + , + , + i64); + +define @intrinsic_vmslt_mask_vv_nxv4i32_nxv4i32( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv4i32_nxv4i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmslt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmslt.nxv4i32( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmslt.mask.nxv4i32( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmslt.nxv8i32( + , + , + i64); + +define @intrinsic_vmslt_vv_nxv8i32_nxv8i32( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_vv_nxv8i32_nxv8i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmslt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmslt.nxv8i32( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmslt.mask.nxv8i32( + , + , + , + , + i64); + +define @intrinsic_vmslt_mask_vv_nxv8i32_nxv8i32( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv8i32_nxv8i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmslt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmslt.nxv8i32( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmslt.mask.nxv8i32( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmslt.nxv1i64( + , + , + i64); + +define @intrinsic_vmslt_vv_nxv1i64_nxv1i64( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_vv_nxv1i64_nxv1i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vmslt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmslt.nxv1i64( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmslt.mask.nxv1i64( + , + , + , + , + i64); + +define @intrinsic_vmslt_mask_vv_nxv1i64_nxv1i64( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv1i64_nxv1i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vmslt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmslt.nxv1i64( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmslt.mask.nxv1i64( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmslt.nxv2i64( + , + , + i64); + +define @intrinsic_vmslt_vv_nxv2i64_nxv2i64( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_vv_nxv2i64_nxv2i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vmslt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmslt.nxv2i64( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmslt.mask.nxv2i64( + , + , + , + , + i64); + +define @intrinsic_vmslt_mask_vv_nxv2i64_nxv2i64( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv2i64_nxv2i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vmslt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmslt.nxv2i64( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmslt.mask.nxv2i64( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmslt.nxv4i64( + , + , + i64); + +define @intrinsic_vmslt_vv_nxv4i64_nxv4i64( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_vv_nxv4i64_nxv4i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vmslt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmslt.nxv4i64( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmslt.mask.nxv4i64( + , + , + , + , + i64); + +define @intrinsic_vmslt_mask_vv_nxv4i64_nxv4i64( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv4i64_nxv4i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vmslt.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmslt.nxv4i64( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmslt.mask.nxv4i64( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmslt.nxv1i8.i8( + , + i8, + i64); + +define @intrinsic_vmslt_vx_nxv1i8_i8( %0, i8 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_vx_nxv1i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmslt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmslt.nxv1i8.i8( + %0, + i8 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmslt.mask.nxv1i8.i8( + , + , + i8, + , + i64); + +define @intrinsic_vmslt_mask_vx_nxv1i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv1i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmslt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmslt.mask.nxv1i8.i8( + %0, + %1, + i8 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmslt.nxv2i8.i8( + , + i8, + i64); + +define @intrinsic_vmslt_vx_nxv2i8_i8( %0, i8 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_vx_nxv2i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmslt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmslt.nxv2i8.i8( + %0, + i8 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmslt.mask.nxv2i8.i8( + , + , + i8, + , + i64); + +define @intrinsic_vmslt_mask_vx_nxv2i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv2i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmslt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmslt.mask.nxv2i8.i8( + %0, + %1, + i8 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmslt.nxv4i8.i8( + , + i8, + i64); + +define @intrinsic_vmslt_vx_nxv4i8_i8( %0, i8 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_vx_nxv4i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmslt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmslt.nxv4i8.i8( + %0, + i8 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmslt.mask.nxv4i8.i8( + , + , + i8, + , + i64); + +define @intrinsic_vmslt_mask_vx_nxv4i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv4i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmslt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmslt.mask.nxv4i8.i8( + %0, + %1, + i8 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmslt.nxv8i8.i8( + , + i8, + i64); + +define @intrinsic_vmslt_vx_nxv8i8_i8( %0, i8 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_vx_nxv8i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmslt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmslt.nxv8i8.i8( + %0, + i8 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmslt.mask.nxv8i8.i8( + , + , + i8, + , + i64); + +define @intrinsic_vmslt_mask_vx_nxv8i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv8i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmslt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmslt.mask.nxv8i8.i8( + %0, + %1, + i8 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmslt.nxv16i8.i8( + , + i8, + i64); + +define @intrinsic_vmslt_vx_nxv16i8_i8( %0, i8 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_vx_nxv16i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmslt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmslt.nxv16i8.i8( + %0, + i8 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmslt.mask.nxv16i8.i8( + , + , + i8, + , + i64); + +define @intrinsic_vmslt_mask_vx_nxv16i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv16i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmslt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmslt.mask.nxv16i8.i8( + %0, + %1, + i8 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmslt.nxv32i8.i8( + , + i8, + i64); + +define @intrinsic_vmslt_vx_nxv32i8_i8( %0, i8 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_vx_nxv32i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmslt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmslt.nxv32i8.i8( + %0, + i8 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmslt.mask.nxv32i8.i8( + , + , + i8, + , + i64); + +define @intrinsic_vmslt_mask_vx_nxv32i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv32i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmslt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmslt.mask.nxv32i8.i8( + %0, + %1, + i8 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmslt.nxv1i16.i16( + , + i16, + i64); + +define @intrinsic_vmslt_vx_nxv1i16_i16( %0, i16 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_vx_nxv1i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmslt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmslt.nxv1i16.i16( + %0, + i16 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmslt.mask.nxv1i16.i16( + , + , + i16, + , + i64); + +define @intrinsic_vmslt_mask_vx_nxv1i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv1i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmslt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmslt.mask.nxv1i16.i16( + %0, + %1, + i16 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmslt.nxv2i16.i16( + , + i16, + i64); + +define @intrinsic_vmslt_vx_nxv2i16_i16( %0, i16 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_vx_nxv2i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmslt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmslt.nxv2i16.i16( + %0, + i16 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmslt.mask.nxv2i16.i16( + , + , + i16, + , + i64); + +define @intrinsic_vmslt_mask_vx_nxv2i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv2i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmslt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmslt.mask.nxv2i16.i16( + %0, + %1, + i16 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmslt.nxv4i16.i16( + , + i16, + i64); + +define @intrinsic_vmslt_vx_nxv4i16_i16( %0, i16 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_vx_nxv4i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmslt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmslt.nxv4i16.i16( + %0, + i16 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmslt.mask.nxv4i16.i16( + , + , + i16, + , + i64); + +define @intrinsic_vmslt_mask_vx_nxv4i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv4i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmslt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmslt.mask.nxv4i16.i16( + %0, + %1, + i16 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmslt.nxv8i16.i16( + , + i16, + i64); + +define @intrinsic_vmslt_vx_nxv8i16_i16( %0, i16 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_vx_nxv8i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmslt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmslt.nxv8i16.i16( + %0, + i16 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmslt.mask.nxv8i16.i16( + , + , + i16, + , + i64); + +define @intrinsic_vmslt_mask_vx_nxv8i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv8i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmslt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmslt.mask.nxv8i16.i16( + %0, + %1, + i16 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmslt.nxv16i16.i16( + , + i16, + i64); + +define @intrinsic_vmslt_vx_nxv16i16_i16( %0, i16 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_vx_nxv16i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmslt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmslt.nxv16i16.i16( + %0, + i16 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmslt.mask.nxv16i16.i16( + , + , + i16, + , + i64); + +define @intrinsic_vmslt_mask_vx_nxv16i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv16i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmslt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmslt.mask.nxv16i16.i16( + %0, + %1, + i16 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmslt.nxv1i32.i32( + , + i32, + i64); + +define @intrinsic_vmslt_vx_nxv1i32_i32( %0, i32 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_vx_nxv1i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmslt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmslt.nxv1i32.i32( + %0, + i32 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmslt.mask.nxv1i32.i32( + , + , + i32, + , + i64); + +define @intrinsic_vmslt_mask_vx_nxv1i32_i32( %0, %1, i32 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv1i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmslt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmslt.mask.nxv1i32.i32( + %0, + %1, + i32 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmslt.nxv2i32.i32( + , + i32, + i64); + +define @intrinsic_vmslt_vx_nxv2i32_i32( %0, i32 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_vx_nxv2i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmslt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmslt.nxv2i32.i32( + %0, + i32 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmslt.mask.nxv2i32.i32( + , + , + i32, + , + i64); + +define @intrinsic_vmslt_mask_vx_nxv2i32_i32( %0, %1, i32 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv2i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmslt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmslt.mask.nxv2i32.i32( + %0, + %1, + i32 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmslt.nxv4i32.i32( + , + i32, + i64); + +define @intrinsic_vmslt_vx_nxv4i32_i32( %0, i32 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_vx_nxv4i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmslt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmslt.nxv4i32.i32( + %0, + i32 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmslt.mask.nxv4i32.i32( + , + , + i32, + , + i64); + +define @intrinsic_vmslt_mask_vx_nxv4i32_i32( %0, %1, i32 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv4i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmslt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmslt.mask.nxv4i32.i32( + %0, + %1, + i32 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmslt.nxv8i32.i32( + , + i32, + i64); + +define @intrinsic_vmslt_vx_nxv8i32_i32( %0, i32 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_vx_nxv8i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmslt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmslt.nxv8i32.i32( + %0, + i32 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmslt.mask.nxv8i32.i32( + , + , + i32, + , + i64); + +define @intrinsic_vmslt_mask_vx_nxv8i32_i32( %0, %1, i32 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv8i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmslt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmslt.mask.nxv8i32.i32( + %0, + %1, + i32 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmslt.nxv1i64.i64( + , + i64, + i64); + +define @intrinsic_vmslt_vx_nxv1i64_i64( %0, i64 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_vx_nxv1i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vmslt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmslt.nxv1i64.i64( + %0, + i64 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmslt.mask.nxv1i64.i64( + , + , + i64, + , + i64); + +define @intrinsic_vmslt_mask_vx_nxv1i64_i64( %0, %1, i64 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv1i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vmslt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmslt.mask.nxv1i64.i64( + %0, + %1, + i64 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmslt.nxv2i64.i64( + , + i64, + i64); + +define @intrinsic_vmslt_vx_nxv2i64_i64( %0, i64 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_vx_nxv2i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vmslt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmslt.nxv2i64.i64( + %0, + i64 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmslt.mask.nxv2i64.i64( + , + , + i64, + , + i64); + +define @intrinsic_vmslt_mask_vx_nxv2i64_i64( %0, %1, i64 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv2i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vmslt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmslt.mask.nxv2i64.i64( + %0, + %1, + i64 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmslt.nxv4i64.i64( + , + i64, + i64); + +define @intrinsic_vmslt_vx_nxv4i64_i64( %0, i64 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_vx_nxv4i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vmslt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmslt.nxv4i64.i64( + %0, + i64 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmslt.mask.nxv4i64.i64( + , + , + i64, + , + i64); + +define @intrinsic_vmslt_mask_vx_nxv4i64_i64( %0, %1, i64 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv4i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vmslt.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmslt.mask.nxv4i64.i64( + %0, + %1, + i64 %2, + %3, + i64 %4) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsltu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmsltu-rv32.ll new file mode 100644 index 0000000000000..5bb1497604ed0 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vmsltu-rv32.ll @@ -0,0 +1,1261 @@ +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare @llvm.riscv.vmsltu.nxv1i8( + , + , + i32); + +define @intrinsic_vmsltu_vv_nxv1i8_nxv1i8( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_vv_nxv1i8_nxv1i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmsltu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsltu.nxv1i8( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsltu.mask.nxv1i8( + , + , + , + , + i32); + +define @intrinsic_vmsltu_mask_vv_nxv1i8_nxv1i8( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv1i8_nxv1i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmsltu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsltu.nxv1i8( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmsltu.mask.nxv1i8( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsltu.nxv2i8( + , + , + i32); + +define @intrinsic_vmsltu_vv_nxv2i8_nxv2i8( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_vv_nxv2i8_nxv2i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmsltu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsltu.nxv2i8( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsltu.mask.nxv2i8( + , + , + , + , + i32); + +define @intrinsic_vmsltu_mask_vv_nxv2i8_nxv2i8( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv2i8_nxv2i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmsltu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsltu.nxv2i8( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmsltu.mask.nxv2i8( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsltu.nxv4i8( + , + , + i32); + +define @intrinsic_vmsltu_vv_nxv4i8_nxv4i8( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_vv_nxv4i8_nxv4i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmsltu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsltu.nxv4i8( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsltu.mask.nxv4i8( + , + , + , + , + i32); + +define @intrinsic_vmsltu_mask_vv_nxv4i8_nxv4i8( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv4i8_nxv4i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmsltu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsltu.nxv4i8( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmsltu.mask.nxv4i8( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsltu.nxv8i8( + , + , + i32); + +define @intrinsic_vmsltu_vv_nxv8i8_nxv8i8( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_vv_nxv8i8_nxv8i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmsltu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsltu.nxv8i8( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsltu.mask.nxv8i8( + , + , + , + , + i32); + +define @intrinsic_vmsltu_mask_vv_nxv8i8_nxv8i8( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv8i8_nxv8i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmsltu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsltu.nxv8i8( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmsltu.mask.nxv8i8( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsltu.nxv16i8( + , + , + i32); + +define @intrinsic_vmsltu_vv_nxv16i8_nxv16i8( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_vv_nxv16i8_nxv16i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmsltu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsltu.nxv16i8( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsltu.mask.nxv16i8( + , + , + , + , + i32); + +define @intrinsic_vmsltu_mask_vv_nxv16i8_nxv16i8( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv16i8_nxv16i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmsltu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsltu.nxv16i8( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmsltu.mask.nxv16i8( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsltu.nxv32i8( + , + , + i32); + +define @intrinsic_vmsltu_vv_nxv32i8_nxv32i8( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_vv_nxv32i8_nxv32i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmsltu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsltu.nxv32i8( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsltu.mask.nxv32i8( + , + , + , + , + i32); + +define @intrinsic_vmsltu_mask_vv_nxv32i8_nxv32i8( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv32i8_nxv32i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmsltu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsltu.nxv32i8( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmsltu.mask.nxv32i8( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsltu.nxv1i16( + , + , + i32); + +define @intrinsic_vmsltu_vv_nxv1i16_nxv1i16( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_vv_nxv1i16_nxv1i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmsltu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsltu.nxv1i16( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsltu.mask.nxv1i16( + , + , + , + , + i32); + +define @intrinsic_vmsltu_mask_vv_nxv1i16_nxv1i16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv1i16_nxv1i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmsltu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsltu.nxv1i16( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmsltu.mask.nxv1i16( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsltu.nxv2i16( + , + , + i32); + +define @intrinsic_vmsltu_vv_nxv2i16_nxv2i16( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_vv_nxv2i16_nxv2i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmsltu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsltu.nxv2i16( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsltu.mask.nxv2i16( + , + , + , + , + i32); + +define @intrinsic_vmsltu_mask_vv_nxv2i16_nxv2i16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv2i16_nxv2i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmsltu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsltu.nxv2i16( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmsltu.mask.nxv2i16( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsltu.nxv4i16( + , + , + i32); + +define @intrinsic_vmsltu_vv_nxv4i16_nxv4i16( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_vv_nxv4i16_nxv4i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmsltu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsltu.nxv4i16( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsltu.mask.nxv4i16( + , + , + , + , + i32); + +define @intrinsic_vmsltu_mask_vv_nxv4i16_nxv4i16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv4i16_nxv4i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmsltu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsltu.nxv4i16( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmsltu.mask.nxv4i16( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsltu.nxv8i16( + , + , + i32); + +define @intrinsic_vmsltu_vv_nxv8i16_nxv8i16( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_vv_nxv8i16_nxv8i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmsltu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsltu.nxv8i16( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsltu.mask.nxv8i16( + , + , + , + , + i32); + +define @intrinsic_vmsltu_mask_vv_nxv8i16_nxv8i16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv8i16_nxv8i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmsltu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsltu.nxv8i16( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmsltu.mask.nxv8i16( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsltu.nxv16i16( + , + , + i32); + +define @intrinsic_vmsltu_vv_nxv16i16_nxv16i16( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_vv_nxv16i16_nxv16i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmsltu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsltu.nxv16i16( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsltu.mask.nxv16i16( + , + , + , + , + i32); + +define @intrinsic_vmsltu_mask_vv_nxv16i16_nxv16i16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv16i16_nxv16i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmsltu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsltu.nxv16i16( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmsltu.mask.nxv16i16( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsltu.nxv1i32( + , + , + i32); + +define @intrinsic_vmsltu_vv_nxv1i32_nxv1i32( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_vv_nxv1i32_nxv1i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmsltu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsltu.nxv1i32( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsltu.mask.nxv1i32( + , + , + , + , + i32); + +define @intrinsic_vmsltu_mask_vv_nxv1i32_nxv1i32( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv1i32_nxv1i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmsltu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsltu.nxv1i32( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmsltu.mask.nxv1i32( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsltu.nxv2i32( + , + , + i32); + +define @intrinsic_vmsltu_vv_nxv2i32_nxv2i32( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_vv_nxv2i32_nxv2i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmsltu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsltu.nxv2i32( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsltu.mask.nxv2i32( + , + , + , + , + i32); + +define @intrinsic_vmsltu_mask_vv_nxv2i32_nxv2i32( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv2i32_nxv2i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmsltu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsltu.nxv2i32( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmsltu.mask.nxv2i32( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsltu.nxv4i32( + , + , + i32); + +define @intrinsic_vmsltu_vv_nxv4i32_nxv4i32( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_vv_nxv4i32_nxv4i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmsltu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsltu.nxv4i32( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsltu.mask.nxv4i32( + , + , + , + , + i32); + +define @intrinsic_vmsltu_mask_vv_nxv4i32_nxv4i32( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv4i32_nxv4i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmsltu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsltu.nxv4i32( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmsltu.mask.nxv4i32( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsltu.nxv8i32( + , + , + i32); + +define @intrinsic_vmsltu_vv_nxv8i32_nxv8i32( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_vv_nxv8i32_nxv8i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmsltu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsltu.nxv8i32( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsltu.mask.nxv8i32( + , + , + , + , + i32); + +define @intrinsic_vmsltu_mask_vv_nxv8i32_nxv8i32( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv8i32_nxv8i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmsltu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsltu.nxv8i32( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmsltu.mask.nxv8i32( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsltu.nxv1i8.i8( + , + i8, + i32); + +define @intrinsic_vmsltu_vx_nxv1i8_i8( %0, i8 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_vx_nxv1i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmsltu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsltu.nxv1i8.i8( + %0, + i8 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsltu.mask.nxv1i8.i8( + , + , + i8, + , + i32); + +define @intrinsic_vmsltu_mask_vx_nxv1i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv1i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmsltu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsltu.mask.nxv1i8.i8( + %0, + %1, + i8 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsltu.nxv2i8.i8( + , + i8, + i32); + +define @intrinsic_vmsltu_vx_nxv2i8_i8( %0, i8 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_vx_nxv2i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmsltu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsltu.nxv2i8.i8( + %0, + i8 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsltu.mask.nxv2i8.i8( + , + , + i8, + , + i32); + +define @intrinsic_vmsltu_mask_vx_nxv2i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv2i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmsltu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsltu.mask.nxv2i8.i8( + %0, + %1, + i8 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsltu.nxv4i8.i8( + , + i8, + i32); + +define @intrinsic_vmsltu_vx_nxv4i8_i8( %0, i8 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_vx_nxv4i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmsltu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsltu.nxv4i8.i8( + %0, + i8 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsltu.mask.nxv4i8.i8( + , + , + i8, + , + i32); + +define @intrinsic_vmsltu_mask_vx_nxv4i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv4i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmsltu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsltu.mask.nxv4i8.i8( + %0, + %1, + i8 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsltu.nxv8i8.i8( + , + i8, + i32); + +define @intrinsic_vmsltu_vx_nxv8i8_i8( %0, i8 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_vx_nxv8i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmsltu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsltu.nxv8i8.i8( + %0, + i8 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsltu.mask.nxv8i8.i8( + , + , + i8, + , + i32); + +define @intrinsic_vmsltu_mask_vx_nxv8i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv8i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmsltu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsltu.mask.nxv8i8.i8( + %0, + %1, + i8 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsltu.nxv16i8.i8( + , + i8, + i32); + +define @intrinsic_vmsltu_vx_nxv16i8_i8( %0, i8 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_vx_nxv16i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmsltu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsltu.nxv16i8.i8( + %0, + i8 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsltu.mask.nxv16i8.i8( + , + , + i8, + , + i32); + +define @intrinsic_vmsltu_mask_vx_nxv16i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv16i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmsltu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsltu.mask.nxv16i8.i8( + %0, + %1, + i8 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsltu.nxv32i8.i8( + , + i8, + i32); + +define @intrinsic_vmsltu_vx_nxv32i8_i8( %0, i8 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_vx_nxv32i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmsltu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsltu.nxv32i8.i8( + %0, + i8 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsltu.mask.nxv32i8.i8( + , + , + i8, + , + i32); + +define @intrinsic_vmsltu_mask_vx_nxv32i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv32i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmsltu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsltu.mask.nxv32i8.i8( + %0, + %1, + i8 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsltu.nxv1i16.i16( + , + i16, + i32); + +define @intrinsic_vmsltu_vx_nxv1i16_i16( %0, i16 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_vx_nxv1i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmsltu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsltu.nxv1i16.i16( + %0, + i16 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsltu.mask.nxv1i16.i16( + , + , + i16, + , + i32); + +define @intrinsic_vmsltu_mask_vx_nxv1i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv1i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmsltu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsltu.mask.nxv1i16.i16( + %0, + %1, + i16 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsltu.nxv2i16.i16( + , + i16, + i32); + +define @intrinsic_vmsltu_vx_nxv2i16_i16( %0, i16 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_vx_nxv2i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmsltu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsltu.nxv2i16.i16( + %0, + i16 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsltu.mask.nxv2i16.i16( + , + , + i16, + , + i32); + +define @intrinsic_vmsltu_mask_vx_nxv2i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv2i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmsltu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsltu.mask.nxv2i16.i16( + %0, + %1, + i16 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsltu.nxv4i16.i16( + , + i16, + i32); + +define @intrinsic_vmsltu_vx_nxv4i16_i16( %0, i16 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_vx_nxv4i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmsltu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsltu.nxv4i16.i16( + %0, + i16 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsltu.mask.nxv4i16.i16( + , + , + i16, + , + i32); + +define @intrinsic_vmsltu_mask_vx_nxv4i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv4i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmsltu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsltu.mask.nxv4i16.i16( + %0, + %1, + i16 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsltu.nxv8i16.i16( + , + i16, + i32); + +define @intrinsic_vmsltu_vx_nxv8i16_i16( %0, i16 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_vx_nxv8i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmsltu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsltu.nxv8i16.i16( + %0, + i16 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsltu.mask.nxv8i16.i16( + , + , + i16, + , + i32); + +define @intrinsic_vmsltu_mask_vx_nxv8i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv8i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmsltu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsltu.mask.nxv8i16.i16( + %0, + %1, + i16 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsltu.nxv16i16.i16( + , + i16, + i32); + +define @intrinsic_vmsltu_vx_nxv16i16_i16( %0, i16 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_vx_nxv16i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmsltu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsltu.nxv16i16.i16( + %0, + i16 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsltu.mask.nxv16i16.i16( + , + , + i16, + , + i32); + +define @intrinsic_vmsltu_mask_vx_nxv16i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv16i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmsltu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsltu.mask.nxv16i16.i16( + %0, + %1, + i16 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsltu.nxv1i32.i32( + , + i32, + i32); + +define @intrinsic_vmsltu_vx_nxv1i32_i32( %0, i32 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_vx_nxv1i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmsltu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsltu.nxv1i32.i32( + %0, + i32 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsltu.mask.nxv1i32.i32( + , + , + i32, + , + i32); + +define @intrinsic_vmsltu_mask_vx_nxv1i32_i32( %0, %1, i32 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv1i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmsltu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsltu.mask.nxv1i32.i32( + %0, + %1, + i32 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsltu.nxv2i32.i32( + , + i32, + i32); + +define @intrinsic_vmsltu_vx_nxv2i32_i32( %0, i32 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_vx_nxv2i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmsltu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsltu.nxv2i32.i32( + %0, + i32 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsltu.mask.nxv2i32.i32( + , + , + i32, + , + i32); + +define @intrinsic_vmsltu_mask_vx_nxv2i32_i32( %0, %1, i32 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv2i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmsltu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsltu.mask.nxv2i32.i32( + %0, + %1, + i32 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsltu.nxv4i32.i32( + , + i32, + i32); + +define @intrinsic_vmsltu_vx_nxv4i32_i32( %0, i32 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_vx_nxv4i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmsltu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsltu.nxv4i32.i32( + %0, + i32 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsltu.mask.nxv4i32.i32( + , + , + i32, + , + i32); + +define @intrinsic_vmsltu_mask_vx_nxv4i32_i32( %0, %1, i32 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv4i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmsltu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsltu.mask.nxv4i32.i32( + %0, + %1, + i32 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsltu.nxv8i32.i32( + , + i32, + i32); + +define @intrinsic_vmsltu_vx_nxv8i32_i32( %0, i32 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_vx_nxv8i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmsltu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsltu.nxv8i32.i32( + %0, + i32 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsltu.mask.nxv8i32.i32( + , + , + i32, + , + i32); + +define @intrinsic_vmsltu_mask_vx_nxv8i32_i32( %0, %1, i32 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv8i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmsltu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsltu.mask.nxv8i32.i32( + %0, + %1, + i32 %2, + %3, + i32 %4) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsltu-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmsltu-rv64.ll new file mode 100644 index 0000000000000..e085ab569720f --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vmsltu-rv64.ll @@ -0,0 +1,1513 @@ +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare @llvm.riscv.vmsltu.nxv1i8( + , + , + i64); + +define @intrinsic_vmsltu_vv_nxv1i8_nxv1i8( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_vv_nxv1i8_nxv1i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmsltu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsltu.nxv1i8( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsltu.mask.nxv1i8( + , + , + , + , + i64); + +define @intrinsic_vmsltu_mask_vv_nxv1i8_nxv1i8( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv1i8_nxv1i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmsltu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsltu.nxv1i8( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmsltu.mask.nxv1i8( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsltu.nxv2i8( + , + , + i64); + +define @intrinsic_vmsltu_vv_nxv2i8_nxv2i8( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_vv_nxv2i8_nxv2i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmsltu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsltu.nxv2i8( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsltu.mask.nxv2i8( + , + , + , + , + i64); + +define @intrinsic_vmsltu_mask_vv_nxv2i8_nxv2i8( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv2i8_nxv2i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmsltu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsltu.nxv2i8( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmsltu.mask.nxv2i8( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsltu.nxv4i8( + , + , + i64); + +define @intrinsic_vmsltu_vv_nxv4i8_nxv4i8( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_vv_nxv4i8_nxv4i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmsltu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsltu.nxv4i8( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsltu.mask.nxv4i8( + , + , + , + , + i64); + +define @intrinsic_vmsltu_mask_vv_nxv4i8_nxv4i8( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv4i8_nxv4i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmsltu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsltu.nxv4i8( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmsltu.mask.nxv4i8( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsltu.nxv8i8( + , + , + i64); + +define @intrinsic_vmsltu_vv_nxv8i8_nxv8i8( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_vv_nxv8i8_nxv8i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmsltu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsltu.nxv8i8( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsltu.mask.nxv8i8( + , + , + , + , + i64); + +define @intrinsic_vmsltu_mask_vv_nxv8i8_nxv8i8( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv8i8_nxv8i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmsltu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsltu.nxv8i8( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmsltu.mask.nxv8i8( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsltu.nxv16i8( + , + , + i64); + +define @intrinsic_vmsltu_vv_nxv16i8_nxv16i8( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_vv_nxv16i8_nxv16i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmsltu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsltu.nxv16i8( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsltu.mask.nxv16i8( + , + , + , + , + i64); + +define @intrinsic_vmsltu_mask_vv_nxv16i8_nxv16i8( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv16i8_nxv16i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmsltu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsltu.nxv16i8( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmsltu.mask.nxv16i8( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsltu.nxv32i8( + , + , + i64); + +define @intrinsic_vmsltu_vv_nxv32i8_nxv32i8( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_vv_nxv32i8_nxv32i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmsltu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsltu.nxv32i8( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsltu.mask.nxv32i8( + , + , + , + , + i64); + +define @intrinsic_vmsltu_mask_vv_nxv32i8_nxv32i8( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv32i8_nxv32i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmsltu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsltu.nxv32i8( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmsltu.mask.nxv32i8( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsltu.nxv1i16( + , + , + i64); + +define @intrinsic_vmsltu_vv_nxv1i16_nxv1i16( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_vv_nxv1i16_nxv1i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmsltu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsltu.nxv1i16( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsltu.mask.nxv1i16( + , + , + , + , + i64); + +define @intrinsic_vmsltu_mask_vv_nxv1i16_nxv1i16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv1i16_nxv1i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmsltu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsltu.nxv1i16( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmsltu.mask.nxv1i16( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsltu.nxv2i16( + , + , + i64); + +define @intrinsic_vmsltu_vv_nxv2i16_nxv2i16( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_vv_nxv2i16_nxv2i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmsltu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsltu.nxv2i16( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsltu.mask.nxv2i16( + , + , + , + , + i64); + +define @intrinsic_vmsltu_mask_vv_nxv2i16_nxv2i16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv2i16_nxv2i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmsltu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsltu.nxv2i16( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmsltu.mask.nxv2i16( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsltu.nxv4i16( + , + , + i64); + +define @intrinsic_vmsltu_vv_nxv4i16_nxv4i16( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_vv_nxv4i16_nxv4i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmsltu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsltu.nxv4i16( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsltu.mask.nxv4i16( + , + , + , + , + i64); + +define @intrinsic_vmsltu_mask_vv_nxv4i16_nxv4i16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv4i16_nxv4i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmsltu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsltu.nxv4i16( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmsltu.mask.nxv4i16( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsltu.nxv8i16( + , + , + i64); + +define @intrinsic_vmsltu_vv_nxv8i16_nxv8i16( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_vv_nxv8i16_nxv8i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmsltu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsltu.nxv8i16( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsltu.mask.nxv8i16( + , + , + , + , + i64); + +define @intrinsic_vmsltu_mask_vv_nxv8i16_nxv8i16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv8i16_nxv8i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmsltu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsltu.nxv8i16( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmsltu.mask.nxv8i16( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsltu.nxv16i16( + , + , + i64); + +define @intrinsic_vmsltu_vv_nxv16i16_nxv16i16( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_vv_nxv16i16_nxv16i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmsltu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsltu.nxv16i16( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsltu.mask.nxv16i16( + , + , + , + , + i64); + +define @intrinsic_vmsltu_mask_vv_nxv16i16_nxv16i16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv16i16_nxv16i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmsltu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsltu.nxv16i16( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmsltu.mask.nxv16i16( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsltu.nxv1i32( + , + , + i64); + +define @intrinsic_vmsltu_vv_nxv1i32_nxv1i32( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_vv_nxv1i32_nxv1i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmsltu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsltu.nxv1i32( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsltu.mask.nxv1i32( + , + , + , + , + i64); + +define @intrinsic_vmsltu_mask_vv_nxv1i32_nxv1i32( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv1i32_nxv1i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmsltu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsltu.nxv1i32( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmsltu.mask.nxv1i32( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsltu.nxv2i32( + , + , + i64); + +define @intrinsic_vmsltu_vv_nxv2i32_nxv2i32( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_vv_nxv2i32_nxv2i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmsltu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsltu.nxv2i32( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsltu.mask.nxv2i32( + , + , + , + , + i64); + +define @intrinsic_vmsltu_mask_vv_nxv2i32_nxv2i32( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv2i32_nxv2i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmsltu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsltu.nxv2i32( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmsltu.mask.nxv2i32( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsltu.nxv4i32( + , + , + i64); + +define @intrinsic_vmsltu_vv_nxv4i32_nxv4i32( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_vv_nxv4i32_nxv4i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmsltu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsltu.nxv4i32( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsltu.mask.nxv4i32( + , + , + , + , + i64); + +define @intrinsic_vmsltu_mask_vv_nxv4i32_nxv4i32( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv4i32_nxv4i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmsltu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsltu.nxv4i32( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmsltu.mask.nxv4i32( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsltu.nxv8i32( + , + , + i64); + +define @intrinsic_vmsltu_vv_nxv8i32_nxv8i32( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_vv_nxv8i32_nxv8i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmsltu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsltu.nxv8i32( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsltu.mask.nxv8i32( + , + , + , + , + i64); + +define @intrinsic_vmsltu_mask_vv_nxv8i32_nxv8i32( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv8i32_nxv8i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmsltu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsltu.nxv8i32( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmsltu.mask.nxv8i32( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsltu.nxv1i64( + , + , + i64); + +define @intrinsic_vmsltu_vv_nxv1i64_nxv1i64( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_vv_nxv1i64_nxv1i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vmsltu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsltu.nxv1i64( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsltu.mask.nxv1i64( + , + , + , + , + i64); + +define @intrinsic_vmsltu_mask_vv_nxv1i64_nxv1i64( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv1i64_nxv1i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vmsltu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsltu.nxv1i64( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmsltu.mask.nxv1i64( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsltu.nxv2i64( + , + , + i64); + +define @intrinsic_vmsltu_vv_nxv2i64_nxv2i64( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_vv_nxv2i64_nxv2i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vmsltu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsltu.nxv2i64( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsltu.mask.nxv2i64( + , + , + , + , + i64); + +define @intrinsic_vmsltu_mask_vv_nxv2i64_nxv2i64( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv2i64_nxv2i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vmsltu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsltu.nxv2i64( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmsltu.mask.nxv2i64( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsltu.nxv4i64( + , + , + i64); + +define @intrinsic_vmsltu_vv_nxv4i64_nxv4i64( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_vv_nxv4i64_nxv4i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vmsltu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsltu.nxv4i64( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsltu.mask.nxv4i64( + , + , + , + , + i64); + +define @intrinsic_vmsltu_mask_vv_nxv4i64_nxv4i64( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv4i64_nxv4i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vmsltu.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsltu.nxv4i64( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmsltu.mask.nxv4i64( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsltu.nxv1i8.i8( + , + i8, + i64); + +define @intrinsic_vmsltu_vx_nxv1i8_i8( %0, i8 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_vx_nxv1i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmsltu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsltu.nxv1i8.i8( + %0, + i8 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsltu.mask.nxv1i8.i8( + , + , + i8, + , + i64); + +define @intrinsic_vmsltu_mask_vx_nxv1i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv1i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmsltu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsltu.mask.nxv1i8.i8( + %0, + %1, + i8 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsltu.nxv2i8.i8( + , + i8, + i64); + +define @intrinsic_vmsltu_vx_nxv2i8_i8( %0, i8 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_vx_nxv2i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmsltu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsltu.nxv2i8.i8( + %0, + i8 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsltu.mask.nxv2i8.i8( + , + , + i8, + , + i64); + +define @intrinsic_vmsltu_mask_vx_nxv2i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv2i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmsltu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsltu.mask.nxv2i8.i8( + %0, + %1, + i8 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsltu.nxv4i8.i8( + , + i8, + i64); + +define @intrinsic_vmsltu_vx_nxv4i8_i8( %0, i8 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_vx_nxv4i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmsltu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsltu.nxv4i8.i8( + %0, + i8 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsltu.mask.nxv4i8.i8( + , + , + i8, + , + i64); + +define @intrinsic_vmsltu_mask_vx_nxv4i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv4i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmsltu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsltu.mask.nxv4i8.i8( + %0, + %1, + i8 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsltu.nxv8i8.i8( + , + i8, + i64); + +define @intrinsic_vmsltu_vx_nxv8i8_i8( %0, i8 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_vx_nxv8i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmsltu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsltu.nxv8i8.i8( + %0, + i8 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsltu.mask.nxv8i8.i8( + , + , + i8, + , + i64); + +define @intrinsic_vmsltu_mask_vx_nxv8i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv8i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmsltu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsltu.mask.nxv8i8.i8( + %0, + %1, + i8 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsltu.nxv16i8.i8( + , + i8, + i64); + +define @intrinsic_vmsltu_vx_nxv16i8_i8( %0, i8 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_vx_nxv16i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmsltu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsltu.nxv16i8.i8( + %0, + i8 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsltu.mask.nxv16i8.i8( + , + , + i8, + , + i64); + +define @intrinsic_vmsltu_mask_vx_nxv16i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv16i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmsltu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsltu.mask.nxv16i8.i8( + %0, + %1, + i8 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsltu.nxv32i8.i8( + , + i8, + i64); + +define @intrinsic_vmsltu_vx_nxv32i8_i8( %0, i8 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_vx_nxv32i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmsltu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsltu.nxv32i8.i8( + %0, + i8 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsltu.mask.nxv32i8.i8( + , + , + i8, + , + i64); + +define @intrinsic_vmsltu_mask_vx_nxv32i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv32i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmsltu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsltu.mask.nxv32i8.i8( + %0, + %1, + i8 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsltu.nxv1i16.i16( + , + i16, + i64); + +define @intrinsic_vmsltu_vx_nxv1i16_i16( %0, i16 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_vx_nxv1i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmsltu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsltu.nxv1i16.i16( + %0, + i16 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsltu.mask.nxv1i16.i16( + , + , + i16, + , + i64); + +define @intrinsic_vmsltu_mask_vx_nxv1i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv1i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmsltu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsltu.mask.nxv1i16.i16( + %0, + %1, + i16 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsltu.nxv2i16.i16( + , + i16, + i64); + +define @intrinsic_vmsltu_vx_nxv2i16_i16( %0, i16 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_vx_nxv2i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmsltu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsltu.nxv2i16.i16( + %0, + i16 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsltu.mask.nxv2i16.i16( + , + , + i16, + , + i64); + +define @intrinsic_vmsltu_mask_vx_nxv2i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv2i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmsltu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsltu.mask.nxv2i16.i16( + %0, + %1, + i16 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsltu.nxv4i16.i16( + , + i16, + i64); + +define @intrinsic_vmsltu_vx_nxv4i16_i16( %0, i16 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_vx_nxv4i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmsltu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsltu.nxv4i16.i16( + %0, + i16 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsltu.mask.nxv4i16.i16( + , + , + i16, + , + i64); + +define @intrinsic_vmsltu_mask_vx_nxv4i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv4i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmsltu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsltu.mask.nxv4i16.i16( + %0, + %1, + i16 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsltu.nxv8i16.i16( + , + i16, + i64); + +define @intrinsic_vmsltu_vx_nxv8i16_i16( %0, i16 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_vx_nxv8i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmsltu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsltu.nxv8i16.i16( + %0, + i16 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsltu.mask.nxv8i16.i16( + , + , + i16, + , + i64); + +define @intrinsic_vmsltu_mask_vx_nxv8i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv8i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmsltu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsltu.mask.nxv8i16.i16( + %0, + %1, + i16 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsltu.nxv16i16.i16( + , + i16, + i64); + +define @intrinsic_vmsltu_vx_nxv16i16_i16( %0, i16 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_vx_nxv16i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmsltu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsltu.nxv16i16.i16( + %0, + i16 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsltu.mask.nxv16i16.i16( + , + , + i16, + , + i64); + +define @intrinsic_vmsltu_mask_vx_nxv16i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv16i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmsltu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsltu.mask.nxv16i16.i16( + %0, + %1, + i16 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsltu.nxv1i32.i32( + , + i32, + i64); + +define @intrinsic_vmsltu_vx_nxv1i32_i32( %0, i32 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_vx_nxv1i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmsltu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsltu.nxv1i32.i32( + %0, + i32 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsltu.mask.nxv1i32.i32( + , + , + i32, + , + i64); + +define @intrinsic_vmsltu_mask_vx_nxv1i32_i32( %0, %1, i32 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv1i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmsltu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsltu.mask.nxv1i32.i32( + %0, + %1, + i32 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsltu.nxv2i32.i32( + , + i32, + i64); + +define @intrinsic_vmsltu_vx_nxv2i32_i32( %0, i32 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_vx_nxv2i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmsltu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsltu.nxv2i32.i32( + %0, + i32 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsltu.mask.nxv2i32.i32( + , + , + i32, + , + i64); + +define @intrinsic_vmsltu_mask_vx_nxv2i32_i32( %0, %1, i32 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv2i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmsltu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsltu.mask.nxv2i32.i32( + %0, + %1, + i32 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsltu.nxv4i32.i32( + , + i32, + i64); + +define @intrinsic_vmsltu_vx_nxv4i32_i32( %0, i32 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_vx_nxv4i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmsltu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsltu.nxv4i32.i32( + %0, + i32 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsltu.mask.nxv4i32.i32( + , + , + i32, + , + i64); + +define @intrinsic_vmsltu_mask_vx_nxv4i32_i32( %0, %1, i32 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv4i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmsltu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsltu.mask.nxv4i32.i32( + %0, + %1, + i32 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsltu.nxv8i32.i32( + , + i32, + i64); + +define @intrinsic_vmsltu_vx_nxv8i32_i32( %0, i32 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_vx_nxv8i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmsltu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsltu.nxv8i32.i32( + %0, + i32 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsltu.mask.nxv8i32.i32( + , + , + i32, + , + i64); + +define @intrinsic_vmsltu_mask_vx_nxv8i32_i32( %0, %1, i32 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv8i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmsltu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsltu.mask.nxv8i32.i32( + %0, + %1, + i32 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsltu.nxv1i64.i64( + , + i64, + i64); + +define @intrinsic_vmsltu_vx_nxv1i64_i64( %0, i64 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_vx_nxv1i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vmsltu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsltu.nxv1i64.i64( + %0, + i64 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsltu.mask.nxv1i64.i64( + , + , + i64, + , + i64); + +define @intrinsic_vmsltu_mask_vx_nxv1i64_i64( %0, %1, i64 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv1i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vmsltu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsltu.mask.nxv1i64.i64( + %0, + %1, + i64 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsltu.nxv2i64.i64( + , + i64, + i64); + +define @intrinsic_vmsltu_vx_nxv2i64_i64( %0, i64 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_vx_nxv2i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vmsltu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsltu.nxv2i64.i64( + %0, + i64 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsltu.mask.nxv2i64.i64( + , + , + i64, + , + i64); + +define @intrinsic_vmsltu_mask_vx_nxv2i64_i64( %0, %1, i64 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv2i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vmsltu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsltu.mask.nxv2i64.i64( + %0, + %1, + i64 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsltu.nxv4i64.i64( + , + i64, + i64); + +define @intrinsic_vmsltu_vx_nxv4i64_i64( %0, i64 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_vx_nxv4i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vmsltu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsltu.nxv4i64.i64( + %0, + i64 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsltu.mask.nxv4i64.i64( + , + , + i64, + , + i64); + +define @intrinsic_vmsltu_mask_vx_nxv4i64_i64( %0, %1, i64 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv4i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vmsltu.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsltu.mask.nxv4i64.i64( + %0, + %1, + i64 %2, + %3, + i64 %4) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsne-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmsne-rv32.ll new file mode 100644 index 0000000000000..0d36d104357fd --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vmsne-rv32.ll @@ -0,0 +1,1681 @@ +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare @llvm.riscv.vmsne.nxv1i8( + , + , + i32); + +define @intrinsic_vmsne_vv_nxv1i8_nxv1i8( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vv_nxv1i8_nxv1i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmsne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsne.nxv1i8( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsne.mask.nxv1i8( + , + , + , + , + i32); + +define @intrinsic_vmsne_mask_vv_nxv1i8_nxv1i8( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv1i8_nxv1i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmsne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsne.nxv1i8( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmsne.mask.nxv1i8( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsne.nxv2i8( + , + , + i32); + +define @intrinsic_vmsne_vv_nxv2i8_nxv2i8( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vv_nxv2i8_nxv2i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmsne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsne.nxv2i8( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsne.mask.nxv2i8( + , + , + , + , + i32); + +define @intrinsic_vmsne_mask_vv_nxv2i8_nxv2i8( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv2i8_nxv2i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmsne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsne.nxv2i8( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmsne.mask.nxv2i8( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsne.nxv4i8( + , + , + i32); + +define @intrinsic_vmsne_vv_nxv4i8_nxv4i8( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vv_nxv4i8_nxv4i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmsne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsne.nxv4i8( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsne.mask.nxv4i8( + , + , + , + , + i32); + +define @intrinsic_vmsne_mask_vv_nxv4i8_nxv4i8( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv4i8_nxv4i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmsne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsne.nxv4i8( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmsne.mask.nxv4i8( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsne.nxv8i8( + , + , + i32); + +define @intrinsic_vmsne_vv_nxv8i8_nxv8i8( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vv_nxv8i8_nxv8i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmsne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsne.nxv8i8( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsne.mask.nxv8i8( + , + , + , + , + i32); + +define @intrinsic_vmsne_mask_vv_nxv8i8_nxv8i8( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv8i8_nxv8i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmsne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsne.nxv8i8( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmsne.mask.nxv8i8( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsne.nxv16i8( + , + , + i32); + +define @intrinsic_vmsne_vv_nxv16i8_nxv16i8( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vv_nxv16i8_nxv16i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmsne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsne.nxv16i8( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsne.mask.nxv16i8( + , + , + , + , + i32); + +define @intrinsic_vmsne_mask_vv_nxv16i8_nxv16i8( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv16i8_nxv16i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmsne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsne.nxv16i8( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmsne.mask.nxv16i8( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsne.nxv32i8( + , + , + i32); + +define @intrinsic_vmsne_vv_nxv32i8_nxv32i8( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vv_nxv32i8_nxv32i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmsne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsne.nxv32i8( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsne.mask.nxv32i8( + , + , + , + , + i32); + +define @intrinsic_vmsne_mask_vv_nxv32i8_nxv32i8( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv32i8_nxv32i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmsne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsne.nxv32i8( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmsne.mask.nxv32i8( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsne.nxv1i16( + , + , + i32); + +define @intrinsic_vmsne_vv_nxv1i16_nxv1i16( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vv_nxv1i16_nxv1i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmsne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsne.nxv1i16( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsne.mask.nxv1i16( + , + , + , + , + i32); + +define @intrinsic_vmsne_mask_vv_nxv1i16_nxv1i16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv1i16_nxv1i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmsne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsne.nxv1i16( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmsne.mask.nxv1i16( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsne.nxv2i16( + , + , + i32); + +define @intrinsic_vmsne_vv_nxv2i16_nxv2i16( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vv_nxv2i16_nxv2i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmsne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsne.nxv2i16( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsne.mask.nxv2i16( + , + , + , + , + i32); + +define @intrinsic_vmsne_mask_vv_nxv2i16_nxv2i16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv2i16_nxv2i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmsne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsne.nxv2i16( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmsne.mask.nxv2i16( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsne.nxv4i16( + , + , + i32); + +define @intrinsic_vmsne_vv_nxv4i16_nxv4i16( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vv_nxv4i16_nxv4i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmsne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsne.nxv4i16( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsne.mask.nxv4i16( + , + , + , + , + i32); + +define @intrinsic_vmsne_mask_vv_nxv4i16_nxv4i16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv4i16_nxv4i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmsne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsne.nxv4i16( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmsne.mask.nxv4i16( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsne.nxv8i16( + , + , + i32); + +define @intrinsic_vmsne_vv_nxv8i16_nxv8i16( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vv_nxv8i16_nxv8i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmsne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsne.nxv8i16( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsne.mask.nxv8i16( + , + , + , + , + i32); + +define @intrinsic_vmsne_mask_vv_nxv8i16_nxv8i16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv8i16_nxv8i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmsne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsne.nxv8i16( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmsne.mask.nxv8i16( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsne.nxv16i16( + , + , + i32); + +define @intrinsic_vmsne_vv_nxv16i16_nxv16i16( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vv_nxv16i16_nxv16i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmsne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsne.nxv16i16( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsne.mask.nxv16i16( + , + , + , + , + i32); + +define @intrinsic_vmsne_mask_vv_nxv16i16_nxv16i16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv16i16_nxv16i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmsne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsne.nxv16i16( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmsne.mask.nxv16i16( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsne.nxv1i32( + , + , + i32); + +define @intrinsic_vmsne_vv_nxv1i32_nxv1i32( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vv_nxv1i32_nxv1i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmsne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsne.nxv1i32( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsne.mask.nxv1i32( + , + , + , + , + i32); + +define @intrinsic_vmsne_mask_vv_nxv1i32_nxv1i32( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv1i32_nxv1i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmsne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsne.nxv1i32( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmsne.mask.nxv1i32( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsne.nxv2i32( + , + , + i32); + +define @intrinsic_vmsne_vv_nxv2i32_nxv2i32( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vv_nxv2i32_nxv2i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmsne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsne.nxv2i32( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsne.mask.nxv2i32( + , + , + , + , + i32); + +define @intrinsic_vmsne_mask_vv_nxv2i32_nxv2i32( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv2i32_nxv2i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmsne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsne.nxv2i32( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmsne.mask.nxv2i32( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsne.nxv4i32( + , + , + i32); + +define @intrinsic_vmsne_vv_nxv4i32_nxv4i32( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vv_nxv4i32_nxv4i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmsne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsne.nxv4i32( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsne.mask.nxv4i32( + , + , + , + , + i32); + +define @intrinsic_vmsne_mask_vv_nxv4i32_nxv4i32( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv4i32_nxv4i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmsne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsne.nxv4i32( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmsne.mask.nxv4i32( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsne.nxv8i32( + , + , + i32); + +define @intrinsic_vmsne_vv_nxv8i32_nxv8i32( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vv_nxv8i32_nxv8i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmsne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsne.nxv8i32( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsne.mask.nxv8i32( + , + , + , + , + i32); + +define @intrinsic_vmsne_mask_vv_nxv8i32_nxv8i32( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv8i32_nxv8i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmsne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsne.nxv8i32( + %1, + %2, + i32 %4) + %a = call @llvm.riscv.vmsne.mask.nxv8i32( + %0, + %2, + %3, + %mask, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsne.nxv1i8.i8( + , + i8, + i32); + +define @intrinsic_vmsne_vx_nxv1i8_i8( %0, i8 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vx_nxv1i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmsne.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsne.nxv1i8.i8( + %0, + i8 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsne.mask.nxv1i8.i8( + , + , + i8, + , + i32); + +define @intrinsic_vmsne_mask_vx_nxv1i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv1i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmsne.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsne.mask.nxv1i8.i8( + %0, + %1, + i8 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsne.nxv2i8.i8( + , + i8, + i32); + +define @intrinsic_vmsne_vx_nxv2i8_i8( %0, i8 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vx_nxv2i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmsne.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsne.nxv2i8.i8( + %0, + i8 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsne.mask.nxv2i8.i8( + , + , + i8, + , + i32); + +define @intrinsic_vmsne_mask_vx_nxv2i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv2i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmsne.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsne.mask.nxv2i8.i8( + %0, + %1, + i8 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsne.nxv4i8.i8( + , + i8, + i32); + +define @intrinsic_vmsne_vx_nxv4i8_i8( %0, i8 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vx_nxv4i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmsne.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsne.nxv4i8.i8( + %0, + i8 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsne.mask.nxv4i8.i8( + , + , + i8, + , + i32); + +define @intrinsic_vmsne_mask_vx_nxv4i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv4i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmsne.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsne.mask.nxv4i8.i8( + %0, + %1, + i8 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsne.nxv8i8.i8( + , + i8, + i32); + +define @intrinsic_vmsne_vx_nxv8i8_i8( %0, i8 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vx_nxv8i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmsne.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsne.nxv8i8.i8( + %0, + i8 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsne.mask.nxv8i8.i8( + , + , + i8, + , + i32); + +define @intrinsic_vmsne_mask_vx_nxv8i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv8i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmsne.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsne.mask.nxv8i8.i8( + %0, + %1, + i8 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsne.nxv16i8.i8( + , + i8, + i32); + +define @intrinsic_vmsne_vx_nxv16i8_i8( %0, i8 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vx_nxv16i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmsne.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsne.nxv16i8.i8( + %0, + i8 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsne.mask.nxv16i8.i8( + , + , + i8, + , + i32); + +define @intrinsic_vmsne_mask_vx_nxv16i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv16i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmsne.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsne.mask.nxv16i8.i8( + %0, + %1, + i8 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsne.nxv32i8.i8( + , + i8, + i32); + +define @intrinsic_vmsne_vx_nxv32i8_i8( %0, i8 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vx_nxv32i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmsne.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsne.nxv32i8.i8( + %0, + i8 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsne.mask.nxv32i8.i8( + , + , + i8, + , + i32); + +define @intrinsic_vmsne_mask_vx_nxv32i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv32i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmsne.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsne.mask.nxv32i8.i8( + %0, + %1, + i8 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsne.nxv1i16.i16( + , + i16, + i32); + +define @intrinsic_vmsne_vx_nxv1i16_i16( %0, i16 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vx_nxv1i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmsne.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsne.nxv1i16.i16( + %0, + i16 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsne.mask.nxv1i16.i16( + , + , + i16, + , + i32); + +define @intrinsic_vmsne_mask_vx_nxv1i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv1i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmsne.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsne.mask.nxv1i16.i16( + %0, + %1, + i16 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsne.nxv2i16.i16( + , + i16, + i32); + +define @intrinsic_vmsne_vx_nxv2i16_i16( %0, i16 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vx_nxv2i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmsne.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsne.nxv2i16.i16( + %0, + i16 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsne.mask.nxv2i16.i16( + , + , + i16, + , + i32); + +define @intrinsic_vmsne_mask_vx_nxv2i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv2i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmsne.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsne.mask.nxv2i16.i16( + %0, + %1, + i16 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsne.nxv4i16.i16( + , + i16, + i32); + +define @intrinsic_vmsne_vx_nxv4i16_i16( %0, i16 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vx_nxv4i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmsne.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsne.nxv4i16.i16( + %0, + i16 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsne.mask.nxv4i16.i16( + , + , + i16, + , + i32); + +define @intrinsic_vmsne_mask_vx_nxv4i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv4i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmsne.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsne.mask.nxv4i16.i16( + %0, + %1, + i16 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsne.nxv8i16.i16( + , + i16, + i32); + +define @intrinsic_vmsne_vx_nxv8i16_i16( %0, i16 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vx_nxv8i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmsne.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsne.nxv8i16.i16( + %0, + i16 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsne.mask.nxv8i16.i16( + , + , + i16, + , + i32); + +define @intrinsic_vmsne_mask_vx_nxv8i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv8i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmsne.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsne.mask.nxv8i16.i16( + %0, + %1, + i16 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsne.nxv16i16.i16( + , + i16, + i32); + +define @intrinsic_vmsne_vx_nxv16i16_i16( %0, i16 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vx_nxv16i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmsne.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsne.nxv16i16.i16( + %0, + i16 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsne.mask.nxv16i16.i16( + , + , + i16, + , + i32); + +define @intrinsic_vmsne_mask_vx_nxv16i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv16i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmsne.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsne.mask.nxv16i16.i16( + %0, + %1, + i16 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsne.nxv1i32.i32( + , + i32, + i32); + +define @intrinsic_vmsne_vx_nxv1i32_i32( %0, i32 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vx_nxv1i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmsne.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsne.nxv1i32.i32( + %0, + i32 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsne.mask.nxv1i32.i32( + , + , + i32, + , + i32); + +define @intrinsic_vmsne_mask_vx_nxv1i32_i32( %0, %1, i32 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv1i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmsne.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsne.mask.nxv1i32.i32( + %0, + %1, + i32 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsne.nxv2i32.i32( + , + i32, + i32); + +define @intrinsic_vmsne_vx_nxv2i32_i32( %0, i32 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vx_nxv2i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmsne.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsne.nxv2i32.i32( + %0, + i32 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsne.mask.nxv2i32.i32( + , + , + i32, + , + i32); + +define @intrinsic_vmsne_mask_vx_nxv2i32_i32( %0, %1, i32 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv2i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmsne.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsne.mask.nxv2i32.i32( + %0, + %1, + i32 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsne.nxv4i32.i32( + , + i32, + i32); + +define @intrinsic_vmsne_vx_nxv4i32_i32( %0, i32 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vx_nxv4i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmsne.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsne.nxv4i32.i32( + %0, + i32 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsne.mask.nxv4i32.i32( + , + , + i32, + , + i32); + +define @intrinsic_vmsne_mask_vx_nxv4i32_i32( %0, %1, i32 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv4i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmsne.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsne.mask.nxv4i32.i32( + %0, + %1, + i32 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsne.nxv8i32.i32( + , + i32, + i32); + +define @intrinsic_vmsne_vx_nxv8i32_i32( %0, i32 %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vx_nxv8i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmsne.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsne.nxv8i32.i32( + %0, + i32 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsne.mask.nxv8i32.i32( + , + , + i32, + , + i32); + +define @intrinsic_vmsne_mask_vx_nxv8i32_i32( %0, %1, i32 %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv8i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmsne.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsne.mask.nxv8i32.i32( + %0, + %1, + i32 %2, + %3, + i32 %4) + + ret %a +} + +define @intrinsic_vmsne_vi_nxv1i8_i8( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vi_nxv1i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmsne.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsne.nxv1i8.i8( + %0, + i8 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmsne_mask_vi_nxv1i8_i8( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv1i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmsne.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsne.mask.nxv1i8.i8( + %0, + %1, + i8 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vmsne_vi_nxv2i8_i8( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vi_nxv2i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmsne.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsne.nxv2i8.i8( + %0, + i8 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmsne_mask_vi_nxv2i8_i8( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv2i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmsne.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsne.mask.nxv2i8.i8( + %0, + %1, + i8 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vmsne_vi_nxv4i8_i8( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vi_nxv4i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmsne.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsne.nxv4i8.i8( + %0, + i8 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmsne_mask_vi_nxv4i8_i8( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv4i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmsne.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsne.mask.nxv4i8.i8( + %0, + %1, + i8 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vmsne_vi_nxv8i8_i8( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vi_nxv8i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmsne.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsne.nxv8i8.i8( + %0, + i8 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmsne_mask_vi_nxv8i8_i8( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv8i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmsne.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsne.mask.nxv8i8.i8( + %0, + %1, + i8 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vmsne_vi_nxv16i8_i8( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vi_nxv16i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmsne.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsne.nxv16i8.i8( + %0, + i8 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmsne_mask_vi_nxv16i8_i8( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv16i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmsne.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsne.mask.nxv16i8.i8( + %0, + %1, + i8 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vmsne_vi_nxv32i8_i8( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vi_nxv32i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmsne.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsne.nxv32i8.i8( + %0, + i8 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmsne_mask_vi_nxv32i8_i8( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv32i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmsne.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsne.mask.nxv32i8.i8( + %0, + %1, + i8 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vmsne_vi_nxv1i16_i16( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vi_nxv1i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmsne.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsne.nxv1i16.i16( + %0, + i16 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmsne_mask_vi_nxv1i16_i16( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv1i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmsne.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsne.mask.nxv1i16.i16( + %0, + %1, + i16 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vmsne_vi_nxv2i16_i16( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vi_nxv2i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmsne.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsne.nxv2i16.i16( + %0, + i16 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmsne_mask_vi_nxv2i16_i16( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv2i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmsne.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsne.mask.nxv2i16.i16( + %0, + %1, + i16 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vmsne_vi_nxv4i16_i16( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vi_nxv4i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmsne.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsne.nxv4i16.i16( + %0, + i16 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmsne_mask_vi_nxv4i16_i16( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv4i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmsne.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsne.mask.nxv4i16.i16( + %0, + %1, + i16 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vmsne_vi_nxv8i16_i16( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vi_nxv8i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmsne.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsne.nxv8i16.i16( + %0, + i16 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmsne_mask_vi_nxv8i16_i16( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv8i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmsne.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsne.mask.nxv8i16.i16( + %0, + %1, + i16 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vmsne_vi_nxv16i16_i16( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vi_nxv16i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmsne.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsne.nxv16i16.i16( + %0, + i16 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmsne_mask_vi_nxv16i16_i16( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv16i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmsne.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsne.mask.nxv16i16.i16( + %0, + %1, + i16 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vmsne_vi_nxv1i32_i32( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vi_nxv1i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmsne.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsne.nxv1i32.i32( + %0, + i32 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmsne_mask_vi_nxv1i32_i32( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv1i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmsne.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsne.mask.nxv1i32.i32( + %0, + %1, + i32 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vmsne_vi_nxv2i32_i32( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vi_nxv2i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmsne.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsne.nxv2i32.i32( + %0, + i32 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmsne_mask_vi_nxv2i32_i32( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv2i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmsne.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsne.mask.nxv2i32.i32( + %0, + %1, + i32 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vmsne_vi_nxv4i32_i32( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vi_nxv4i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmsne.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsne.nxv4i32.i32( + %0, + i32 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmsne_mask_vi_nxv4i32_i32( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv4i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmsne.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsne.mask.nxv4i32.i32( + %0, + %1, + i32 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vmsne_vi_nxv8i32_i32( %0, i32 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vi_nxv8i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmsne.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsne.nxv8i32.i32( + %0, + i32 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmsne_mask_vi_nxv8i32_i32( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv8i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmsne.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsne.mask.nxv8i32.i32( + %0, + %1, + i32 9, + %2, + i32 %3) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsne-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmsne-rv64.ll new file mode 100644 index 0000000000000..960b9d0d03d6d --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vmsne-rv64.ll @@ -0,0 +1,2017 @@ +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare @llvm.riscv.vmsne.nxv1i8( + , + , + i64); + +define @intrinsic_vmsne_vv_nxv1i8_nxv1i8( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vv_nxv1i8_nxv1i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmsne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsne.nxv1i8( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsne.mask.nxv1i8( + , + , + , + , + i64); + +define @intrinsic_vmsne_mask_vv_nxv1i8_nxv1i8( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv1i8_nxv1i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmsne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsne.nxv1i8( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmsne.mask.nxv1i8( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsne.nxv2i8( + , + , + i64); + +define @intrinsic_vmsne_vv_nxv2i8_nxv2i8( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vv_nxv2i8_nxv2i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmsne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsne.nxv2i8( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsne.mask.nxv2i8( + , + , + , + , + i64); + +define @intrinsic_vmsne_mask_vv_nxv2i8_nxv2i8( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv2i8_nxv2i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmsne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsne.nxv2i8( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmsne.mask.nxv2i8( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsne.nxv4i8( + , + , + i64); + +define @intrinsic_vmsne_vv_nxv4i8_nxv4i8( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vv_nxv4i8_nxv4i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmsne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsne.nxv4i8( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsne.mask.nxv4i8( + , + , + , + , + i64); + +define @intrinsic_vmsne_mask_vv_nxv4i8_nxv4i8( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv4i8_nxv4i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmsne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsne.nxv4i8( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmsne.mask.nxv4i8( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsne.nxv8i8( + , + , + i64); + +define @intrinsic_vmsne_vv_nxv8i8_nxv8i8( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vv_nxv8i8_nxv8i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmsne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsne.nxv8i8( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsne.mask.nxv8i8( + , + , + , + , + i64); + +define @intrinsic_vmsne_mask_vv_nxv8i8_nxv8i8( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv8i8_nxv8i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmsne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsne.nxv8i8( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmsne.mask.nxv8i8( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsne.nxv16i8( + , + , + i64); + +define @intrinsic_vmsne_vv_nxv16i8_nxv16i8( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vv_nxv16i8_nxv16i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmsne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsne.nxv16i8( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsne.mask.nxv16i8( + , + , + , + , + i64); + +define @intrinsic_vmsne_mask_vv_nxv16i8_nxv16i8( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv16i8_nxv16i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmsne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsne.nxv16i8( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmsne.mask.nxv16i8( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsne.nxv32i8( + , + , + i64); + +define @intrinsic_vmsne_vv_nxv32i8_nxv32i8( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vv_nxv32i8_nxv32i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmsne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsne.nxv32i8( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsne.mask.nxv32i8( + , + , + , + , + i64); + +define @intrinsic_vmsne_mask_vv_nxv32i8_nxv32i8( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv32i8_nxv32i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmsne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsne.nxv32i8( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmsne.mask.nxv32i8( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsne.nxv1i16( + , + , + i64); + +define @intrinsic_vmsne_vv_nxv1i16_nxv1i16( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vv_nxv1i16_nxv1i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmsne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsne.nxv1i16( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsne.mask.nxv1i16( + , + , + , + , + i64); + +define @intrinsic_vmsne_mask_vv_nxv1i16_nxv1i16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv1i16_nxv1i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmsne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsne.nxv1i16( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmsne.mask.nxv1i16( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsne.nxv2i16( + , + , + i64); + +define @intrinsic_vmsne_vv_nxv2i16_nxv2i16( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vv_nxv2i16_nxv2i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmsne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsne.nxv2i16( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsne.mask.nxv2i16( + , + , + , + , + i64); + +define @intrinsic_vmsne_mask_vv_nxv2i16_nxv2i16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv2i16_nxv2i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmsne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsne.nxv2i16( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmsne.mask.nxv2i16( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsne.nxv4i16( + , + , + i64); + +define @intrinsic_vmsne_vv_nxv4i16_nxv4i16( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vv_nxv4i16_nxv4i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmsne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsne.nxv4i16( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsne.mask.nxv4i16( + , + , + , + , + i64); + +define @intrinsic_vmsne_mask_vv_nxv4i16_nxv4i16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv4i16_nxv4i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmsne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsne.nxv4i16( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmsne.mask.nxv4i16( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsne.nxv8i16( + , + , + i64); + +define @intrinsic_vmsne_vv_nxv8i16_nxv8i16( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vv_nxv8i16_nxv8i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmsne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsne.nxv8i16( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsne.mask.nxv8i16( + , + , + , + , + i64); + +define @intrinsic_vmsne_mask_vv_nxv8i16_nxv8i16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv8i16_nxv8i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmsne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsne.nxv8i16( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmsne.mask.nxv8i16( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsne.nxv16i16( + , + , + i64); + +define @intrinsic_vmsne_vv_nxv16i16_nxv16i16( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vv_nxv16i16_nxv16i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmsne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsne.nxv16i16( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsne.mask.nxv16i16( + , + , + , + , + i64); + +define @intrinsic_vmsne_mask_vv_nxv16i16_nxv16i16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv16i16_nxv16i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmsne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsne.nxv16i16( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmsne.mask.nxv16i16( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsne.nxv1i32( + , + , + i64); + +define @intrinsic_vmsne_vv_nxv1i32_nxv1i32( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vv_nxv1i32_nxv1i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmsne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsne.nxv1i32( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsne.mask.nxv1i32( + , + , + , + , + i64); + +define @intrinsic_vmsne_mask_vv_nxv1i32_nxv1i32( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv1i32_nxv1i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmsne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsne.nxv1i32( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmsne.mask.nxv1i32( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsne.nxv2i32( + , + , + i64); + +define @intrinsic_vmsne_vv_nxv2i32_nxv2i32( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vv_nxv2i32_nxv2i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmsne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsne.nxv2i32( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsne.mask.nxv2i32( + , + , + , + , + i64); + +define @intrinsic_vmsne_mask_vv_nxv2i32_nxv2i32( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv2i32_nxv2i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmsne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsne.nxv2i32( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmsne.mask.nxv2i32( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsne.nxv4i32( + , + , + i64); + +define @intrinsic_vmsne_vv_nxv4i32_nxv4i32( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vv_nxv4i32_nxv4i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmsne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsne.nxv4i32( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsne.mask.nxv4i32( + , + , + , + , + i64); + +define @intrinsic_vmsne_mask_vv_nxv4i32_nxv4i32( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv4i32_nxv4i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmsne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsne.nxv4i32( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmsne.mask.nxv4i32( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsne.nxv8i32( + , + , + i64); + +define @intrinsic_vmsne_vv_nxv8i32_nxv8i32( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vv_nxv8i32_nxv8i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmsne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsne.nxv8i32( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsne.mask.nxv8i32( + , + , + , + , + i64); + +define @intrinsic_vmsne_mask_vv_nxv8i32_nxv8i32( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv8i32_nxv8i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmsne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsne.nxv8i32( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmsne.mask.nxv8i32( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsne.nxv1i64( + , + , + i64); + +define @intrinsic_vmsne_vv_nxv1i64_nxv1i64( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vv_nxv1i64_nxv1i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vmsne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsne.nxv1i64( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsne.mask.nxv1i64( + , + , + , + , + i64); + +define @intrinsic_vmsne_mask_vv_nxv1i64_nxv1i64( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv1i64_nxv1i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vmsne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsne.nxv1i64( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmsne.mask.nxv1i64( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsne.nxv2i64( + , + , + i64); + +define @intrinsic_vmsne_vv_nxv2i64_nxv2i64( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vv_nxv2i64_nxv2i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vmsne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsne.nxv2i64( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsne.mask.nxv2i64( + , + , + , + , + i64); + +define @intrinsic_vmsne_mask_vv_nxv2i64_nxv2i64( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv2i64_nxv2i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vmsne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsne.nxv2i64( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmsne.mask.nxv2i64( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsne.nxv4i64( + , + , + i64); + +define @intrinsic_vmsne_vv_nxv4i64_nxv4i64( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vv_nxv4i64_nxv4i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vmsne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vmsne.nxv4i64( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsne.mask.nxv4i64( + , + , + , + , + i64); + +define @intrinsic_vmsne_mask_vv_nxv4i64_nxv4i64( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv4i64_nxv4i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vmsne.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %mask = call @llvm.riscv.vmsne.nxv4i64( + %1, + %2, + i64 %4) + %a = call @llvm.riscv.vmsne.mask.nxv4i64( + %0, + %2, + %3, + %mask, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsne.nxv1i8.i8( + , + i8, + i64); + +define @intrinsic_vmsne_vx_nxv1i8_i8( %0, i8 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vx_nxv1i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmsne.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsne.nxv1i8.i8( + %0, + i8 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsne.mask.nxv1i8.i8( + , + , + i8, + , + i64); + +define @intrinsic_vmsne_mask_vx_nxv1i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv1i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmsne.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsne.mask.nxv1i8.i8( + %0, + %1, + i8 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsne.nxv2i8.i8( + , + i8, + i64); + +define @intrinsic_vmsne_vx_nxv2i8_i8( %0, i8 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vx_nxv2i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmsne.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsne.nxv2i8.i8( + %0, + i8 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsne.mask.nxv2i8.i8( + , + , + i8, + , + i64); + +define @intrinsic_vmsne_mask_vx_nxv2i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv2i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmsne.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsne.mask.nxv2i8.i8( + %0, + %1, + i8 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsne.nxv4i8.i8( + , + i8, + i64); + +define @intrinsic_vmsne_vx_nxv4i8_i8( %0, i8 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vx_nxv4i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmsne.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsne.nxv4i8.i8( + %0, + i8 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsne.mask.nxv4i8.i8( + , + , + i8, + , + i64); + +define @intrinsic_vmsne_mask_vx_nxv4i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv4i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmsne.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsne.mask.nxv4i8.i8( + %0, + %1, + i8 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsne.nxv8i8.i8( + , + i8, + i64); + +define @intrinsic_vmsne_vx_nxv8i8_i8( %0, i8 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vx_nxv8i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmsne.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsne.nxv8i8.i8( + %0, + i8 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsne.mask.nxv8i8.i8( + , + , + i8, + , + i64); + +define @intrinsic_vmsne_mask_vx_nxv8i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv8i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmsne.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsne.mask.nxv8i8.i8( + %0, + %1, + i8 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsne.nxv16i8.i8( + , + i8, + i64); + +define @intrinsic_vmsne_vx_nxv16i8_i8( %0, i8 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vx_nxv16i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmsne.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsne.nxv16i8.i8( + %0, + i8 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsne.mask.nxv16i8.i8( + , + , + i8, + , + i64); + +define @intrinsic_vmsne_mask_vx_nxv16i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv16i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmsne.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsne.mask.nxv16i8.i8( + %0, + %1, + i8 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsne.nxv32i8.i8( + , + i8, + i64); + +define @intrinsic_vmsne_vx_nxv32i8_i8( %0, i8 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vx_nxv32i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmsne.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsne.nxv32i8.i8( + %0, + i8 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsne.mask.nxv32i8.i8( + , + , + i8, + , + i64); + +define @intrinsic_vmsne_mask_vx_nxv32i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv32i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmsne.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsne.mask.nxv32i8.i8( + %0, + %1, + i8 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsne.nxv1i16.i16( + , + i16, + i64); + +define @intrinsic_vmsne_vx_nxv1i16_i16( %0, i16 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vx_nxv1i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmsne.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsne.nxv1i16.i16( + %0, + i16 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsne.mask.nxv1i16.i16( + , + , + i16, + , + i64); + +define @intrinsic_vmsne_mask_vx_nxv1i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv1i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmsne.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsne.mask.nxv1i16.i16( + %0, + %1, + i16 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsne.nxv2i16.i16( + , + i16, + i64); + +define @intrinsic_vmsne_vx_nxv2i16_i16( %0, i16 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vx_nxv2i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmsne.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsne.nxv2i16.i16( + %0, + i16 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsne.mask.nxv2i16.i16( + , + , + i16, + , + i64); + +define @intrinsic_vmsne_mask_vx_nxv2i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv2i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmsne.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsne.mask.nxv2i16.i16( + %0, + %1, + i16 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsne.nxv4i16.i16( + , + i16, + i64); + +define @intrinsic_vmsne_vx_nxv4i16_i16( %0, i16 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vx_nxv4i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmsne.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsne.nxv4i16.i16( + %0, + i16 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsne.mask.nxv4i16.i16( + , + , + i16, + , + i64); + +define @intrinsic_vmsne_mask_vx_nxv4i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv4i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmsne.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsne.mask.nxv4i16.i16( + %0, + %1, + i16 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsne.nxv8i16.i16( + , + i16, + i64); + +define @intrinsic_vmsne_vx_nxv8i16_i16( %0, i16 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vx_nxv8i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmsne.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsne.nxv8i16.i16( + %0, + i16 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsne.mask.nxv8i16.i16( + , + , + i16, + , + i64); + +define @intrinsic_vmsne_mask_vx_nxv8i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv8i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmsne.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsne.mask.nxv8i16.i16( + %0, + %1, + i16 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsne.nxv16i16.i16( + , + i16, + i64); + +define @intrinsic_vmsne_vx_nxv16i16_i16( %0, i16 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vx_nxv16i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmsne.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsne.nxv16i16.i16( + %0, + i16 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsne.mask.nxv16i16.i16( + , + , + i16, + , + i64); + +define @intrinsic_vmsne_mask_vx_nxv16i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv16i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmsne.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsne.mask.nxv16i16.i16( + %0, + %1, + i16 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsne.nxv1i32.i32( + , + i32, + i64); + +define @intrinsic_vmsne_vx_nxv1i32_i32( %0, i32 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vx_nxv1i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmsne.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsne.nxv1i32.i32( + %0, + i32 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsne.mask.nxv1i32.i32( + , + , + i32, + , + i64); + +define @intrinsic_vmsne_mask_vx_nxv1i32_i32( %0, %1, i32 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv1i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmsne.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsne.mask.nxv1i32.i32( + %0, + %1, + i32 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsne.nxv2i32.i32( + , + i32, + i64); + +define @intrinsic_vmsne_vx_nxv2i32_i32( %0, i32 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vx_nxv2i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmsne.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsne.nxv2i32.i32( + %0, + i32 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsne.mask.nxv2i32.i32( + , + , + i32, + , + i64); + +define @intrinsic_vmsne_mask_vx_nxv2i32_i32( %0, %1, i32 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv2i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmsne.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsne.mask.nxv2i32.i32( + %0, + %1, + i32 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsne.nxv4i32.i32( + , + i32, + i64); + +define @intrinsic_vmsne_vx_nxv4i32_i32( %0, i32 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vx_nxv4i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmsne.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsne.nxv4i32.i32( + %0, + i32 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsne.mask.nxv4i32.i32( + , + , + i32, + , + i64); + +define @intrinsic_vmsne_mask_vx_nxv4i32_i32( %0, %1, i32 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv4i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmsne.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsne.mask.nxv4i32.i32( + %0, + %1, + i32 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsne.nxv8i32.i32( + , + i32, + i64); + +define @intrinsic_vmsne_vx_nxv8i32_i32( %0, i32 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vx_nxv8i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmsne.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsne.nxv8i32.i32( + %0, + i32 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsne.mask.nxv8i32.i32( + , + , + i32, + , + i64); + +define @intrinsic_vmsne_mask_vx_nxv8i32_i32( %0, %1, i32 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv8i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmsne.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsne.mask.nxv8i32.i32( + %0, + %1, + i32 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsne.nxv1i64.i64( + , + i64, + i64); + +define @intrinsic_vmsne_vx_nxv1i64_i64( %0, i64 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vx_nxv1i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vmsne.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsne.nxv1i64.i64( + %0, + i64 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsne.mask.nxv1i64.i64( + , + , + i64, + , + i64); + +define @intrinsic_vmsne_mask_vx_nxv1i64_i64( %0, %1, i64 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv1i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vmsne.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsne.mask.nxv1i64.i64( + %0, + %1, + i64 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsne.nxv2i64.i64( + , + i64, + i64); + +define @intrinsic_vmsne_vx_nxv2i64_i64( %0, i64 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vx_nxv2i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vmsne.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsne.nxv2i64.i64( + %0, + i64 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsne.mask.nxv2i64.i64( + , + , + i64, + , + i64); + +define @intrinsic_vmsne_mask_vx_nxv2i64_i64( %0, %1, i64 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv2i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vmsne.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsne.mask.nxv2i64.i64( + %0, + %1, + i64 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmsne.nxv4i64.i64( + , + i64, + i64); + +define @intrinsic_vmsne_vx_nxv4i64_i64( %0, i64 %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vx_nxv4i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vmsne.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}} + %a = call @llvm.riscv.vmsne.nxv4i64.i64( + %0, + i64 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmsne.mask.nxv4i64.i64( + , + , + i64, + , + i64); + +define @intrinsic_vmsne_mask_vx_nxv4i64_i64( %0, %1, i64 %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv4i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vmsne.vx {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0.t + %a = call @llvm.riscv.vmsne.mask.nxv4i64.i64( + %0, + %1, + i64 %2, + %3, + i64 %4) + + ret %a +} + +define @intrinsic_vmsne_vi_nxv1i8_i8( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vi_nxv1i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmsne.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsne.nxv1i8.i8( + %0, + i8 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmsne_mask_vi_nxv1i8_i8( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv1i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8,ta,mu +; CHECK: vmsne.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsne.mask.nxv1i8.i8( + %0, + %1, + i8 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmsne_vi_nxv2i8_i8( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vi_nxv2i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmsne.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsne.nxv2i8.i8( + %0, + i8 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmsne_mask_vi_nxv2i8_i8( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv2i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4,ta,mu +; CHECK: vmsne.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsne.mask.nxv2i8.i8( + %0, + %1, + i8 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmsne_vi_nxv4i8_i8( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vi_nxv4i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmsne.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsne.nxv4i8.i8( + %0, + i8 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmsne_mask_vi_nxv4i8_i8( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv4i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2,ta,mu +; CHECK: vmsne.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsne.mask.nxv4i8.i8( + %0, + %1, + i8 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmsne_vi_nxv8i8_i8( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vi_nxv8i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmsne.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsne.nxv8i8.i8( + %0, + i8 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmsne_mask_vi_nxv8i8_i8( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv8i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1,ta,mu +; CHECK: vmsne.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsne.mask.nxv8i8.i8( + %0, + %1, + i8 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmsne_vi_nxv16i8_i8( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vi_nxv16i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmsne.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsne.nxv16i8.i8( + %0, + i8 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmsne_mask_vi_nxv16i8_i8( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv16i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2,ta,mu +; CHECK: vmsne.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsne.mask.nxv16i8.i8( + %0, + %1, + i8 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmsne_vi_nxv32i8_i8( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vi_nxv32i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmsne.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsne.nxv32i8.i8( + %0, + i8 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmsne_mask_vi_nxv32i8_i8( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv32i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4,ta,mu +; CHECK: vmsne.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsne.mask.nxv32i8.i8( + %0, + %1, + i8 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmsne_vi_nxv1i16_i16( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vi_nxv1i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmsne.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsne.nxv1i16.i16( + %0, + i16 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmsne_mask_vi_nxv1i16_i16( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv1i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vmsne.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsne.mask.nxv1i16.i16( + %0, + %1, + i16 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmsne_vi_nxv2i16_i16( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vi_nxv2i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmsne.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsne.nxv2i16.i16( + %0, + i16 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmsne_mask_vi_nxv2i16_i16( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv2i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vmsne.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsne.mask.nxv2i16.i16( + %0, + %1, + i16 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmsne_vi_nxv4i16_i16( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vi_nxv4i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmsne.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsne.nxv4i16.i16( + %0, + i16 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmsne_mask_vi_nxv4i16_i16( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv4i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vmsne.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsne.mask.nxv4i16.i16( + %0, + %1, + i16 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmsne_vi_nxv8i16_i16( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vi_nxv8i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmsne.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsne.nxv8i16.i16( + %0, + i16 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmsne_mask_vi_nxv8i16_i16( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv8i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vmsne.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsne.mask.nxv8i16.i16( + %0, + %1, + i16 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmsne_vi_nxv16i16_i16( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vi_nxv16i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmsne.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsne.nxv16i16.i16( + %0, + i16 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmsne_mask_vi_nxv16i16_i16( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv16i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vmsne.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsne.mask.nxv16i16.i16( + %0, + %1, + i16 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmsne_vi_nxv1i32_i32( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vi_nxv1i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmsne.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsne.nxv1i32.i32( + %0, + i32 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmsne_mask_vi_nxv1i32_i32( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv1i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vmsne.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsne.mask.nxv1i32.i32( + %0, + %1, + i32 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmsne_vi_nxv2i32_i32( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vi_nxv2i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmsne.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsne.nxv2i32.i32( + %0, + i32 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmsne_mask_vi_nxv2i32_i32( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv2i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vmsne.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsne.mask.nxv2i32.i32( + %0, + %1, + i32 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmsne_vi_nxv4i32_i32( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vi_nxv4i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmsne.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsne.nxv4i32.i32( + %0, + i32 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmsne_mask_vi_nxv4i32_i32( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv4i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vmsne.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsne.mask.nxv4i32.i32( + %0, + %1, + i32 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmsne_vi_nxv8i32_i32( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vi_nxv8i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmsne.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsne.nxv8i32.i32( + %0, + i32 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmsne_mask_vi_nxv8i32_i32( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv8i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vmsne.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsne.mask.nxv8i32.i32( + %0, + %1, + i32 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmsne_vi_nxv1i64_i64( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vi_nxv1i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vmsne.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsne.nxv1i64.i64( + %0, + i64 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmsne_mask_vi_nxv1i64_i64( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv1i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vmsne.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsne.mask.nxv1i64.i64( + %0, + %1, + i64 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmsne_vi_nxv2i64_i64( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vi_nxv2i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vmsne.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsne.nxv2i64.i64( + %0, + i64 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmsne_mask_vi_nxv2i64_i64( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv2i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vmsne.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsne.mask.nxv2i64.i64( + %0, + %1, + i64 9, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmsne_vi_nxv4i64_i64( %0, i64 %1) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_vi_nxv4i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vmsne.vi {{v[0-9]+}}, {{v[0-9]+}}, 9 + %a = call @llvm.riscv.vmsne.nxv4i64.i64( + %0, + i64 9, + i64 %1) + + ret %a +} + +define @intrinsic_vmsne_mask_vi_nxv4i64_i64( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv4i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vmsne.vi {{v[0-9]+}}, {{v[0-9]+}}, 9, v0.t + %a = call @llvm.riscv.vmsne.mask.nxv4i64.i64( + %0, + %1, + i64 9, + %2, + i64 %3) + + ret %a +} From 442aac5da68c467563dc6fedf37892ee3d2b688b Mon Sep 17 00:00:00 2001 From: sameeran joshi Date: Sat, 19 Dec 2020 00:03:45 +0530 Subject: [PATCH 064/378] [Flang][openmp][1/5] Make Allocate clause part of OmpClause After discussion in `D93482` we found that the some of the clauses were not following the common OmpClause convention. The benefits of using OmpClause: - Functionalities from structure checker are mostly aligned to work with `llvm::omp::Clause`. - The unparsing as well can take advantage. - Homogeneity with OpenACC and rest of the clauses in OpenMP. - Could even generate the parser with TableGen, when there is homogeneity. - It becomes confusing when to use `flangClass` and `flangClassValue` inside TableGen, if incase we generate parser using TableGen we could have only a single `let expression`. This patch makes `allocate` clause part of `OmpClause`.The unparse function for `OmpAllocateClause` is adapted since the keyword and parenthesis are issued by the corresponding unparse function for `parser::OmpClause::Allocate`. Reviewed By: clementval Differential Revision: https://reviews.llvm.org/D93640 --- flang/lib/Parser/openmp-parsers.cpp | 4 ++-- flang/lib/Parser/unparse.cpp | 5 ++--- flang/lib/Semantics/check-omp-structure.cpp | 2 +- flang/lib/Semantics/check-omp-structure.h | 2 +- llvm/include/llvm/Frontend/OpenMP/OMP.td | 2 +- 5 files changed, 7 insertions(+), 8 deletions(-) diff --git a/flang/lib/Parser/openmp-parsers.cpp b/flang/lib/Parser/openmp-parsers.cpp index 67c377e798cae..ff8ba774a6ce8 100644 --- a/flang/lib/Parser/openmp-parsers.cpp +++ b/flang/lib/Parser/openmp-parsers.cpp @@ -157,8 +157,8 @@ TYPE_PARSER( "ACQ_REL" >> construct(construct()) || "ALIGNED" >> construct(parenthesized(Parser{})) || - "ALLOCATE" >> - construct(parenthesized(Parser{})) || + "ALLOCATE" >> construct(construct( + parenthesized(Parser{}))) || "ALLOCATOR" >> construct(construct( parenthesized(scalarIntExpr))) || "COLLAPSE" >> construct(construct( diff --git a/flang/lib/Parser/unparse.cpp b/flang/lib/Parser/unparse.cpp index a027c8fc9af69..ed17bac92965c 100644 --- a/flang/lib/Parser/unparse.cpp +++ b/flang/lib/Parser/unparse.cpp @@ -2020,10 +2020,9 @@ class UnparseVisitor { Put(")"); } void Unparse(const OmpAllocateClause &x) { - Word("ALLOCATE("); - Walk(std::get>(x.t), ":"); + Walk(std::get>(x.t)); + Put(":"); Walk(std::get(x.t)); - Put(")"); } void Unparse(const OmpDependSinkVecLength &x) { Walk(std::get(x.t)); diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp index 978e1c7962a49..58db754593188 100644 --- a/flang/lib/Semantics/check-omp-structure.cpp +++ b/flang/lib/Semantics/check-omp-structure.cpp @@ -403,6 +403,7 @@ void OmpStructureChecker::Enter(const parser::OmpClause &x) { // Following clauses do not have a seperate node in parse-tree.h. // They fall under 'struct OmpClause' in parse-tree.h. +CHECK_SIMPLE_CLAUSE(Allocate, OMPC_allocate) CHECK_SIMPLE_CLAUSE(Copyin, OMPC_copyin) CHECK_SIMPLE_CLAUSE(Copyprivate, OMPC_copyprivate) CHECK_SIMPLE_CLAUSE(Device, OMPC_device) @@ -489,7 +490,6 @@ void OmpStructureChecker::CheckIsVarPartOfAnotherVar( } } // Following clauses have a seperate node in parse-tree.h. -CHECK_SIMPLE_PARSER_CLAUSE(OmpAllocateClause, OMPC_allocate) CHECK_SIMPLE_PARSER_CLAUSE(OmpDefaultClause, OMPC_default) CHECK_SIMPLE_PARSER_CLAUSE(OmpDistScheduleClause, OMPC_dist_schedule) CHECK_SIMPLE_PARSER_CLAUSE(OmpNowait, OMPC_nowait) diff --git a/flang/lib/Semantics/check-omp-structure.h b/flang/lib/Semantics/check-omp-structure.h index 2949568f60447..32da0fb009548 100644 --- a/flang/lib/Semantics/check-omp-structure.h +++ b/flang/lib/Semantics/check-omp-structure.h @@ -127,6 +127,7 @@ class OmpStructureChecker void Leave(const parser::OmpClauseList &); void Enter(const parser::OmpClause &); void Enter(const parser::OmpNowait &); + void Enter(const parser::OmpClause::Allocate &); void Enter(const parser::OmpClause::Allocator &); void Enter(const parser::OmpClause::Inbranch &); void Enter(const parser::OmpClause::Mergeable &); @@ -174,7 +175,6 @@ class OmpStructureChecker void Enter(const parser::OmpAtomicCapture &); void Leave(const parser::OmpAtomic &); void Enter(const parser::OmpAlignedClause &); - void Enter(const parser::OmpAllocateClause &); void Enter(const parser::OmpDefaultClause &); void Enter(const parser::OmpDefaultmapClause &); void Enter(const parser::OmpDependClause &); diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td index 58aa1bf23b685..6ad8fa92084b1 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMP.td +++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td @@ -254,7 +254,7 @@ def OMPC_AtomicDefaultMemOrder : Clause<"atomic_default_mem_order"> { } def OMPC_Allocate : Clause<"allocate"> { let clangClass = "OMPAllocateClause"; - let flangClass = "OmpAllocateClause"; + let flangClassValue = "OmpAllocateClause"; } def OMPC_NonTemporal : Clause<"nontemporal"> { let clangClass = "OMPNontemporalClause"; From f72c384b5ba943c92fadcbe77f9d7661728905ab Mon Sep 17 00:00:00 2001 From: sameeran joshi Date: Mon, 21 Dec 2020 14:10:27 +0530 Subject: [PATCH 065/378] [Flang][openmp][2/5] Make Default clause part of OmpClause After discussion in `D93482` we found that the some of the clauses were not following the common OmpClause convention. The benefits of using OmpClause: - Functionalities from structure checker are mostly aligned to work with `llvm::omp::Clause`. - The unparsing as well can take advantage. - Homogeneity with OpenACC and rest of the clauses in OpenMP. - Could even generate the parser with TableGen, when there is homogeneity. - It becomes confusing when to use `flangClass` and `flangClassValue` inside TableGen, if incase we generate parser using TableGen we could have only a single `let expression`. This patch makes `OmpDefaultClause` clause part of `OmpClause`. The unparse function is dropped as the unparsing is done by `WALK_NESTED_ENUM` for `OmpDefaultClause`. Reviewed By: clementval, kiranktp Differential Revision: https://reviews.llvm.org/D93641 --- flang/lib/Lower/OpenMP.cpp | 5 +++-- flang/lib/Parser/openmp-parsers.cpp | 4 ++-- flang/lib/Parser/unparse.cpp | 5 ----- flang/lib/Semantics/check-omp-structure.cpp | 2 +- flang/lib/Semantics/check-omp-structure.h | 2 +- llvm/include/llvm/Frontend/OpenMP/OMP.td | 2 +- 6 files changed, 8 insertions(+), 12 deletions(-) diff --git a/flang/lib/Lower/OpenMP.cpp b/flang/lib/Lower/OpenMP.cpp index 97946caa68a07..f73dd09fbe68e 100644 --- a/flang/lib/Lower/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP.cpp @@ -191,8 +191,9 @@ genOMP(Fortran::lower::AbstractConverter &converter, // Handle attribute based clauses. for (const auto &clause : parallelOpClauseList.v) { if (const auto &defaultClause = - std::get_if(&clause.u)) { - switch (defaultClause->v) { + std::get_if(&clause.u)) { + const auto &ompDefaultClause{defaultClause->v}; + switch (ompDefaultClause.v) { case Fortran::parser::OmpDefaultClause::Type::Private: parallelOp.default_valAttr(firOpBuilder.getStringAttr( omp::stringifyClauseDefault(omp::ClauseDefault::defprivate))); diff --git a/flang/lib/Parser/openmp-parsers.cpp b/flang/lib/Parser/openmp-parsers.cpp index ff8ba774a6ce8..e982dd19e4980 100644 --- a/flang/lib/Parser/openmp-parsers.cpp +++ b/flang/lib/Parser/openmp-parsers.cpp @@ -167,8 +167,8 @@ TYPE_PARSER( parenthesized(Parser{}))) || "COPYPRIVATE" >> construct(construct( (parenthesized(Parser{})))) || - "DEFAULT"_id >> - construct(parenthesized(Parser{})) || + "DEFAULT"_id >> construct(construct( + parenthesized(Parser{}))) || "DEFAULTMAP" >> construct(parenthesized(Parser{})) || "DEPEND" >> diff --git a/flang/lib/Parser/unparse.cpp b/flang/lib/Parser/unparse.cpp index ed17bac92965c..a4b0c64011fc3 100644 --- a/flang/lib/Parser/unparse.cpp +++ b/flang/lib/Parser/unparse.cpp @@ -2058,11 +2058,6 @@ class UnparseVisitor { }, x.u); } - bool Pre(const OmpDefaultClause &) { - Word("DEFAULT("); - return true; - } - void Post(const OmpDefaultClause &) { Put(")"); } bool Pre(const OmpProcBindClause &) { Word("PROC_BIND("); return true; diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp index 58db754593188..6ed7106bb9f47 100644 --- a/flang/lib/Semantics/check-omp-structure.cpp +++ b/flang/lib/Semantics/check-omp-structure.cpp @@ -406,6 +406,7 @@ void OmpStructureChecker::Enter(const parser::OmpClause &x) { CHECK_SIMPLE_CLAUSE(Allocate, OMPC_allocate) CHECK_SIMPLE_CLAUSE(Copyin, OMPC_copyin) CHECK_SIMPLE_CLAUSE(Copyprivate, OMPC_copyprivate) +CHECK_SIMPLE_CLAUSE(Default, OMPC_default) CHECK_SIMPLE_CLAUSE(Device, OMPC_device) CHECK_SIMPLE_CLAUSE(Final, OMPC_final) CHECK_SIMPLE_CLAUSE(Firstprivate, OMPC_firstprivate) @@ -490,7 +491,6 @@ void OmpStructureChecker::CheckIsVarPartOfAnotherVar( } } // Following clauses have a seperate node in parse-tree.h. -CHECK_SIMPLE_PARSER_CLAUSE(OmpDefaultClause, OMPC_default) CHECK_SIMPLE_PARSER_CLAUSE(OmpDistScheduleClause, OMPC_dist_schedule) CHECK_SIMPLE_PARSER_CLAUSE(OmpNowait, OMPC_nowait) CHECK_SIMPLE_PARSER_CLAUSE(OmpProcBindClause, OMPC_proc_bind) diff --git a/flang/lib/Semantics/check-omp-structure.h b/flang/lib/Semantics/check-omp-structure.h index 32da0fb009548..dcc2deeb348c4 100644 --- a/flang/lib/Semantics/check-omp-structure.h +++ b/flang/lib/Semantics/check-omp-structure.h @@ -137,6 +137,7 @@ class OmpStructureChecker void Enter(const parser::OmpClause::Collapse &); void Enter(const parser::OmpClause::Copyin &); void Enter(const parser::OmpClause::Copyprivate &); + void Enter(const parser::OmpClause::Default &); void Enter(const parser::OmpClause::Device &); void Enter(const parser::OmpClause::Final &); void Enter(const parser::OmpClause::Firstprivate &); @@ -175,7 +176,6 @@ class OmpStructureChecker void Enter(const parser::OmpAtomicCapture &); void Leave(const parser::OmpAtomic &); void Enter(const parser::OmpAlignedClause &); - void Enter(const parser::OmpDefaultClause &); void Enter(const parser::OmpDefaultmapClause &); void Enter(const parser::OmpDependClause &); void Enter(const parser::OmpDistScheduleClause &); diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td index 6ad8fa92084b1..f06990068b40f 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMP.td +++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td @@ -62,7 +62,7 @@ def OMPC_Collapse : Clause<"collapse"> { } def OMPC_Default : Clause<"default"> { let clangClass = "OMPDefaultClause"; - let flangClass = "OmpDefaultClause"; + let flangClassValue = "OmpDefaultClause"; } def OMPC_Private : Clause<"private"> { let clangClass = "OMPPrivateClause"; From b8c37153d5393aad96feefe0b4689b7b62bc160d Mon Sep 17 00:00:00 2001 From: Quentin Chateau Date: Tue, 22 Dec 2020 08:44:20 +0100 Subject: [PATCH 066/378] [clangd] Trim memory periodically when using glibc malloc This diff addresses the issue of the ever increasing memory usage of clangd. The key to understand what happens is to use `malloc_stats()`: malloc arenas keep getting bigger, although the actual memory used does not. It seems some operations while bulding the indices (both dynamic and background) create this problem. Specifically, 'FileSymbols::update' and 'FileSymbols::buildIndex' seem especially affected. This diff adds a call to `malloc_trim()` periodically in ClangdLSPServer. Fixes: https://github.com/clangd/clangd/issues/251 Fixes: https://github.com/clangd/clangd/issues/115 Reviewed By: sammccall Differential Revision: https://reviews.llvm.org/D93452 --- clang-tools-extra/clangd/CMakeLists.txt | 3 ++ clang-tools-extra/clangd/ClangdLSPServer.cpp | 32 ++++++++++++++++++-- clang-tools-extra/clangd/ClangdLSPServer.h | 11 +++++++ clang-tools-extra/clangd/Features.inc.in | 1 + clang-tools-extra/clangd/tool/ClangdMain.cpp | 28 +++++++++++++++++ 5 files changed, 73 insertions(+), 2 deletions(-) diff --git a/clang-tools-extra/clangd/CMakeLists.txt b/clang-tools-extra/clangd/CMakeLists.txt index 919457f216c15..9e62e09480274 100644 --- a/clang-tools-extra/clangd/CMakeLists.txt +++ b/clang-tools-extra/clangd/CMakeLists.txt @@ -14,9 +14,12 @@ if (NOT DEFINED CLANGD_BUILD_XPC) unset(CLANGD_BUILD_XPC_DEFAULT) endif () +option(CLANGD_MALLOC_TRIM "Call malloc_trim(3) periodically in Clangd. (only takes effect when using glibc)" ON) + llvm_canonicalize_cmake_booleans( CLANGD_BUILD_XPC CLANGD_ENABLE_REMOTE + CLANGD_MALLOC_TRIM LLVM_ENABLE_ZLIB ) diff --git a/clang-tools-extra/clangd/ClangdLSPServer.cpp b/clang-tools-extra/clangd/ClangdLSPServer.cpp index b32c9e13973b6..0c42f95fb5947 100644 --- a/clang-tools-extra/clangd/ClangdLSPServer.cpp +++ b/clang-tools-extra/clangd/ClangdLSPServer.cpp @@ -178,6 +178,7 @@ class ClangdLSPServer::MessageHandler : public Transport::MessageHandler { } else if (auto Handler = Notifications.lookup(Method)) { Handler(std::move(Params)); Server.maybeExportMemoryProfile(); + Server.maybeCleanupMemory(); } else { log("unhandled notification {0}", Method); } @@ -453,6 +454,7 @@ void ClangdLSPServer::callRaw(StringRef Method, llvm::json::Value Params, void ClangdLSPServer::notify(llvm::StringRef Method, llvm::json::Value Params) { log("--> {0}", Method); + maybeCleanupMemory(); std::lock_guard Lock(TranspWriter); Transp.notify(Method, std::move(Params)); } @@ -1301,6 +1303,27 @@ void ClangdLSPServer::maybeExportMemoryProfile() { NextProfileTime = Now + ProfileInterval; } +void ClangdLSPServer::maybeCleanupMemory() { + // Memory cleanup is probably expensive, throttle it + static constexpr auto MemoryCleanupInterval = std::chrono::minutes(1); + + if (!Opts.MemoryCleanup) + return; + + // FIXME: this can probably be done without a mutex + // and the logic could be shared with maybeExportMemoryProfile + { + auto Now = std::chrono::steady_clock::now(); + std::lock_guard Lock(NextMemoryCleanupTimeMutex); + if (Now < NextMemoryCleanupTime) + return; + NextMemoryCleanupTime = Now + MemoryCleanupInterval; + } + + vlog("Calling memory cleanup callback"); + Opts.MemoryCleanup(); +} + // FIXME: This function needs to be properly tested. void ClangdLSPServer::onChangeConfiguration( const DidChangeConfigurationParams &Params) { @@ -1507,8 +1530,9 @@ ClangdLSPServer::ClangdLSPServer(class Transport &Transp, MsgHandler->bind("textDocument/foldingRange", &ClangdLSPServer::onFoldingRange); // clang-format on - // Delay first profile until we've finished warming up. - NextProfileTime = std::chrono::steady_clock::now() + std::chrono::minutes(1); + // Delay first profile and memory cleanup until we've finished warming up. + NextMemoryCleanupTime = NextProfileTime = + std::chrono::steady_clock::now() + std::chrono::minutes(1); } ClangdLSPServer::~ClangdLSPServer() { @@ -1621,6 +1645,10 @@ void ClangdLSPServer::onDiagnosticsReady(PathRef File, llvm::StringRef Version, void ClangdLSPServer::onBackgroundIndexProgress( const BackgroundQueue::Stats &Stats) { static const char ProgressToken[] = "backgroundIndexProgress"; + + // The background index did some work, maybe we need to cleanup + maybeCleanupMemory(); + std::lock_guard Lock(BackgroundIndexProgressMutex); auto NotifyProgress = [this](const BackgroundQueue::Stats &Stats) { diff --git a/clang-tools-extra/clangd/ClangdLSPServer.h b/clang-tools-extra/clangd/ClangdLSPServer.h index e65fc0e8a0064..b5f9d2c9d766a 100644 --- a/clang-tools-extra/clangd/ClangdLSPServer.h +++ b/clang-tools-extra/clangd/ClangdLSPServer.h @@ -48,6 +48,9 @@ class ClangdLSPServer : private ClangdServer::Callbacks { llvm::Optional CompileCommandsDir; /// The offset-encoding to use, or None to negotiate it over LSP. llvm::Optional Encoding; + /// If set, periodically called to release memory. + /// Consider malloc_trim(3) + std::function MemoryCleanup = nullptr; /// Per-feature options. Generally ClangdServer lets these vary /// per-request, but LSP allows limited/no customizations. @@ -184,10 +187,18 @@ class ClangdLSPServer : private ClangdServer::Callbacks { /// profiling hasn't happened recently. void maybeExportMemoryProfile(); + /// Run the MemoryCleanup callback if it's time. + /// This method is thread safe. + void maybeCleanupMemory(); + /// Timepoint until which profiling is off. It is used to throttle profiling /// requests. std::chrono::steady_clock::time_point NextProfileTime; + /// Next time we want to call the MemoryCleanup callback. + std::mutex NextMemoryCleanupTimeMutex; + std::chrono::steady_clock::time_point NextMemoryCleanupTime; + /// Since initialization of CDBs and ClangdServer is done lazily, the /// following context captures the one used while creating ClangdLSPServer and /// passes it to above mentioned object instances to make sure they share the diff --git a/clang-tools-extra/clangd/Features.inc.in b/clang-tools-extra/clangd/Features.inc.in index 6797232ddac7c..c21d2b1455710 100644 --- a/clang-tools-extra/clangd/Features.inc.in +++ b/clang-tools-extra/clangd/Features.inc.in @@ -1,2 +1,3 @@ #define CLANGD_BUILD_XPC @CLANGD_BUILD_XPC@ #define CLANGD_ENABLE_REMOTE @CLANGD_ENABLE_REMOTE@ +#define CLANGD_MALLOC_TRIM @CLANGD_MALLOC_TRIM@ diff --git a/clang-tools-extra/clangd/tool/ClangdMain.cpp b/clang-tools-extra/clangd/tool/ClangdMain.cpp index 331241115302b..d2c52cf61c53b 100644 --- a/clang-tools-extra/clangd/tool/ClangdMain.cpp +++ b/clang-tools-extra/clangd/tool/ClangdMain.cpp @@ -50,6 +50,10 @@ #include #endif +#ifdef __GLIBC__ +#include +#endif + namespace clang { namespace clangd { @@ -497,6 +501,29 @@ opt CollectMainFileRefs{ init(ClangdServer::Options().CollectMainFileRefs), }; +#if defined(__GLIBC__) && CLANGD_MALLOC_TRIM +opt EnableMallocTrim{ + "malloc-trim", + cat(Misc), + desc("Release memory periodically via malloc_trim(3)."), + init(true), +}; + +std::function getMemoryCleanupFunction() { + if (!EnableMallocTrim) + return nullptr; + // Leave a few MB at the top of the heap: it is insignificant + // and will most likely be needed by the main thread + constexpr size_t MallocTrimPad = 20'000'000; + return []() { + if (malloc_trim(MallocTrimPad)) + vlog("Released memory via malloc_trim"); + }; +} +#else +std::function getMemoryCleanupFunction() { return nullptr; } +#endif + #if CLANGD_ENABLE_REMOTE opt RemoteIndexAddress{ "remote-index-address", @@ -797,6 +824,7 @@ clangd accepts flags on the commandline, and in the CLANGD_FLAGS environment var Opts.BuildRecoveryAST = RecoveryAST; Opts.PreserveRecoveryASTType = RecoveryASTType; Opts.FoldingRanges = FoldingRanges; + Opts.MemoryCleanup = getMemoryCleanupFunction(); Opts.CodeComplete.IncludeIneligibleResults = IncludeIneligibleResults; Opts.CodeComplete.Limit = LimitResults; From 34958d11c3457c8e05bbe2b31d5e013c04aecb55 Mon Sep 17 00:00:00 2001 From: sameeran joshi Date: Mon, 21 Dec 2020 18:43:44 +0530 Subject: [PATCH 067/378] [Flang][openmp][3/5] Make ProcBind clause part of OmpClause After discussion in `D93482` we found that the some of the clauses were not following the common OmpClause convention. The benefits of using OmpClause: - Functionalities from structure checker are mostly aligned to work with `llvm::omp::Clause`. - The unparsing as well can take advantage. - Homogeneity with OpenACC and rest of the clauses in OpenMP. - Could even generate the parser with TableGen, when there is homogeneity. - It becomes confusing when to use `flangClass` and `flangClassValue` inside TableGen, if incase we generate parser using TableGen we could have only a single `let expression`. This patch makes `OmpProcBindClause` clause part of `OmpClause`. The unparse function is dropped as the unparsing is done by `WALK_NESTED_ENUM` for `OmpProcBindClause`. Reviewed By: clementval, kiranktp Differential Revision: https://reviews.llvm.org/D93642 --- flang/lib/Lower/OpenMP.cpp | 5 +++-- flang/lib/Parser/openmp-parsers.cpp | 4 ++-- flang/lib/Parser/unparse.cpp | 5 ----- flang/lib/Semantics/check-omp-structure.cpp | 2 +- flang/lib/Semantics/check-omp-structure.h | 2 +- llvm/include/llvm/Frontend/OpenMP/OMP.td | 2 +- 6 files changed, 8 insertions(+), 12 deletions(-) diff --git a/flang/lib/Lower/OpenMP.cpp b/flang/lib/Lower/OpenMP.cpp index f73dd09fbe68e..f765723bb9ae8 100644 --- a/flang/lib/Lower/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP.cpp @@ -214,8 +214,9 @@ genOMP(Fortran::lower::AbstractConverter &converter, } } if (const auto &procBindClause = - std::get_if(&clause.u)) { - switch (procBindClause->v) { + std::get_if(&clause.u)) { + const auto &ompProcBindClause{procBindClause->v}; + switch (ompProcBindClause.v) { case Fortran::parser::OmpProcBindClause::Type::Master: parallelOp.proc_bind_valAttr( firOpBuilder.getStringAttr(omp::stringifyClauseProcBindKind( diff --git a/flang/lib/Parser/openmp-parsers.cpp b/flang/lib/Parser/openmp-parsers.cpp index e982dd19e4980..50999bef8f527 100644 --- a/flang/lib/Parser/openmp-parsers.cpp +++ b/flang/lib/Parser/openmp-parsers.cpp @@ -216,8 +216,8 @@ TYPE_PARSER( parenthesized(scalarIntExpr))) || "PRIVATE" >> construct(construct( parenthesized(Parser{}))) || - "PROC_BIND" >> - construct(parenthesized(Parser{})) || + "PROC_BIND" >> construct(construct( + parenthesized(Parser{}))) || "REDUCTION" >> construct(parenthesized(Parser{})) || "RELAXED" >> construct(construct()) || diff --git a/flang/lib/Parser/unparse.cpp b/flang/lib/Parser/unparse.cpp index a4b0c64011fc3..6be063c1b1bc9 100644 --- a/flang/lib/Parser/unparse.cpp +++ b/flang/lib/Parser/unparse.cpp @@ -2058,11 +2058,6 @@ class UnparseVisitor { }, x.u); } - bool Pre(const OmpProcBindClause &) { - Word("PROC_BIND("); - return true; - } - void Post(const OmpProcBindClause &) { Put(")"); } void Unparse(const OmpDefaultmapClause &x) { Word("DEFAULTMAP("); Walk(std::get(x.t)); diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp index 6ed7106bb9f47..481099b349668 100644 --- a/flang/lib/Semantics/check-omp-structure.cpp +++ b/flang/lib/Semantics/check-omp-structure.cpp @@ -428,6 +428,7 @@ CHECK_SIMPLE_CLAUSE(SeqCst, OMPC_seq_cst) CHECK_SIMPLE_CLAUSE(Release, OMPC_release) CHECK_SIMPLE_CLAUSE(Relaxed, OMPC_relaxed) CHECK_SIMPLE_CLAUSE(Hint, OMPC_hint) +CHECK_SIMPLE_CLAUSE(ProcBind, OMPC_proc_bind) CHECK_REQ_SCALAR_INT_CLAUSE(Allocator, OMPC_allocator) CHECK_REQ_SCALAR_INT_CLAUSE(Grainsize, OMPC_grainsize) @@ -493,7 +494,6 @@ void OmpStructureChecker::CheckIsVarPartOfAnotherVar( // Following clauses have a seperate node in parse-tree.h. CHECK_SIMPLE_PARSER_CLAUSE(OmpDistScheduleClause, OMPC_dist_schedule) CHECK_SIMPLE_PARSER_CLAUSE(OmpNowait, OMPC_nowait) -CHECK_SIMPLE_PARSER_CLAUSE(OmpProcBindClause, OMPC_proc_bind) CHECK_SIMPLE_PARSER_CLAUSE(OmpReductionClause, OMPC_reduction) // Atomic-clause CHECK_SIMPLE_PARSER_CLAUSE(OmpAtomicRead, OMPC_read) diff --git a/flang/lib/Semantics/check-omp-structure.h b/flang/lib/Semantics/check-omp-structure.h index dcc2deeb348c4..89fc3d9faa21b 100644 --- a/flang/lib/Semantics/check-omp-structure.h +++ b/flang/lib/Semantics/check-omp-structure.h @@ -150,6 +150,7 @@ class OmpStructureChecker void Enter(const parser::OmpClause::Ordered &); void Enter(const parser::OmpClause::Priority &); void Enter(const parser::OmpClause::Private &); + void Enter(const parser::OmpClause::ProcBind &); void Enter(const parser::OmpClause::Safelen &); void Enter(const parser::OmpClause::Shared &); void Enter(const parser::OmpClause::Simdlen &); @@ -182,7 +183,6 @@ class OmpStructureChecker void Enter(const parser::OmpIfClause &); void Enter(const parser::OmpLinearClause &); void Enter(const parser::OmpMapClause &); - void Enter(const parser::OmpProcBindClause &); void Enter(const parser::OmpReductionClause &); void Enter(const parser::OmpScheduleClause &); diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td index f06990068b40f..28b978975ba06 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMP.td +++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td @@ -107,7 +107,7 @@ def OMP_PROC_BIND_default : ClauseVal<"default",5,0> {} def OMP_PROC_BIND_unknown : ClauseVal<"unknown",6,0> { let isDefault = true; } def OMPC_ProcBind : Clause<"proc_bind"> { let clangClass = "OMPProcBindClause"; - let flangClass = "OmpProcBindClause"; + let flangClassValue = "OmpProcBindClause"; let enumClauseValue = "ProcBindKind"; let allowedClauseValues = [ OMP_PROC_BIND_master, From a56280094e08792516b035390a946ea337a27b97 Mon Sep 17 00:00:00 2001 From: Gil Rapaport Date: Tue, 15 Dec 2020 22:47:12 +0200 Subject: [PATCH 068/378] [LV] Avoid needless fold tail When the trip-count is provably divisible by the maximal/chosen VF, folding the loop's tail during vectorization is redundant. This commit extends the existing test for constant trip-counts to any trip-count known to be divisible by maximal/selected VF by SCEV. Differential Revision: https://reviews.llvm.org/D93615 --- .../Transforms/Vectorize/LoopVectorize.cpp | 13 ++++ .../dont-fold-tail-for-const-TC.ll | 69 +++++++++++++++++++ .../dont-fold-tail-for-divisible-TC.ll | 62 +++++++++++++++++ 3 files changed, 144 insertions(+) create mode 100644 llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-const-TC.ll create mode 100644 llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 25deab6d2b359..680106bcb7c6c 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -5512,6 +5512,19 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) { return MaxVF; } + // Avoid tail folding if the trip count is known to be a multiple of any VF we + // chose. + ScalarEvolution *SE = PSE.getSE(); + const SCEV *BackedgeTakenCount = PSE.getBackedgeTakenCount(); + const SCEV *ExitCount = SE->getAddExpr( + BackedgeTakenCount, SE->getOne(BackedgeTakenCount->getType())); + unsigned TCisMultipleOf = 1 << SE->GetMinTrailingZeros(ExitCount); + if (TCisMultipleOf % MaxVFtimesIC == 0) { + // Accept MaxVF if we do not have a tail. + LLVM_DEBUG(dbgs() << "LV: No tail will remain for any chosen VF.\n"); + return MaxVF; + } + // If we don't know the precise trip count, or if the trip count that we // found modulo the vectorization factor is not zero, try to fold the tail // by masking. diff --git a/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-const-TC.ll b/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-const-TC.ll new file mode 100644 index 0000000000000..0dba201e73fb5 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-const-TC.ll @@ -0,0 +1,69 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -loop-vectorize -force-vector-interleave=3 -force-vector-width=2 -S | FileCheck %s + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" + +; Make sure the loop is vectorized and unrolled under -Os without folding its +; tail based on its trip-count being provably divisible by chosen VFxIC. + +define dso_local void @constTC(i32* noalias nocapture %A) optsize { +; CHECK-LABEL: @constTC( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> undef, i32 [[INDEX]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> undef, <2 x i32> zeroinitializer +; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i32> [[BROADCAST_SPLAT]], +; CHECK-NEXT: [[INDUCTION1:%.*]] = add <2 x i32> [[BROADCAST_SPLAT]], +; CHECK-NEXT: [[INDUCTION2:%.*]] = add <2 x i32> [[BROADCAST_SPLAT]], +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 2 +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[TMP0]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TMP1]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TMP2]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP3]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <2 x i32>* +; CHECK-NEXT: store <2 x i32> , <2 x i32>* [[TMP7]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[TMP3]], i32 2 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <2 x i32>* +; CHECK-NEXT: store <2 x i32> , <2 x i32>* [[TMP9]], align 1 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[TMP3]], i32 4 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32* [[TMP10]] to <2 x i32>* +; CHECK-NEXT: store <2 x i32> , <2 x i32>* [[TMP11]], align 1 +; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 6 +; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1800 +; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 1800, 1800 +; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 1800, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[RIV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[RIVPLUS1:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[RIV]] +; CHECK-NEXT: store i32 13, i32* [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[RIVPLUS1]] = add nuw nsw i32 [[RIV]], 1 +; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[RIVPLUS1]], 1800 +; CHECK-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], [[LOOP2:!llvm.loop !.*]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %riv = phi i32 [ 0, %entry ], [ %rivPlus1, %loop ] + %arrayidx = getelementptr inbounds i32, i32* %A, i32 %riv + store i32 13, i32* %arrayidx, align 1 + %rivPlus1 = add nuw nsw i32 %riv, 1 + %cond = icmp eq i32 %rivPlus1, 1800 + br i1 %cond, label %exit, label %loop + +exit: + ret void +} diff --git a/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll b/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll new file mode 100644 index 0000000000000..82819a5db0772 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll @@ -0,0 +1,62 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -loop-vectorize -force-vector-width=4 -S | FileCheck %s + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" + +; Make sure the loop is vectorized under -Os without folding its tail based on +; its trip-count's lower bits known to be zero. + +define dso_local void @alignTC(i32* noalias nocapture %A, i32 %n) optsize { +; CHECK-LABEL: @alignTC( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ALIGNEDTC:%.*]] = and i32 [[N:%.*]], -8 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[ALIGNEDTC]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[ALIGNEDTC]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[ALIGNEDTC]], [[N_MOD_VF]] +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[INDEX]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> , <4 x i32>* [[TMP3]], align 1 +; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[ALIGNEDTC]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[RIV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[RIVPLUS1:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[RIV]] +; CHECK-NEXT: store i32 13, i32* [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[RIVPLUS1]] = add nuw nsw i32 [[RIV]], 1 +; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[RIVPLUS1]], [[ALIGNEDTC]] +; CHECK-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], [[LOOP2:!llvm.loop !.*]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + %alignedTC = and i32 %n, -8 + br label %loop + +loop: + %riv = phi i32 [ 0, %entry ], [ %rivPlus1, %loop ] + %arrayidx = getelementptr inbounds i32, i32* %A, i32 %riv + store i32 13, i32* %arrayidx, align 1 + %rivPlus1 = add nuw nsw i32 %riv, 1 + %cond = icmp eq i32 %rivPlus1, %alignedTC + br i1 %cond, label %exit, label %loop + +exit: + ret void +} From e282ae57da29a37e143ab6d640f68e794f5cd614 Mon Sep 17 00:00:00 2001 From: sameeran joshi Date: Mon, 21 Dec 2020 19:34:12 +0530 Subject: [PATCH 069/378] [Flang][openmp][4/5] Make nowait clause part of OmpClause After discussion in `D93482` we found that the some of the clauses were not following the common OmpClause convention. The benefits of using OmpClause: - Functionalities from structure checker are mostly aligned to work with `llvm::omp::Clause`. - The unparsing as well can take advantage. - Homogeneity with OpenACC and rest of the clauses in OpenMP. - Could even generate the parser with TableGen, when there is homogeneity. - It becomes confusing when to use `flangClass` and `flangClassValue` inside TableGen, if incase we generate parser using TableGen we could have only a single `let expression`. This patch makes `OmpNoWait` clause part of `OmpClause`. Reviewed By: clementval, kiranktp Differential Revision: https://reviews.llvm.org/D93643 --- flang/include/flang/Parser/dump-parse-tree.h | 1 - flang/include/flang/Parser/parse-tree.h | 3 --- flang/lib/Parser/openmp-parsers.cpp | 2 +- flang/lib/Parser/unparse.cpp | 1 - flang/lib/Semantics/check-omp-structure.cpp | 2 +- flang/lib/Semantics/check-omp-structure.h | 2 +- llvm/include/llvm/Frontend/OpenMP/OMP.td | 1 - 7 files changed, 3 insertions(+), 9 deletions(-) diff --git a/flang/include/flang/Parser/dump-parse-tree.h b/flang/include/flang/Parser/dump-parse-tree.h index f69dd149e0a3a..05152f8c49c68 100644 --- a/flang/include/flang/Parser/dump-parse-tree.h +++ b/flang/include/flang/Parser/dump-parse-tree.h @@ -508,7 +508,6 @@ class ParseTreeDumper { "llvm::omp::Clause = ", llvm::omp::getOpenMPClauseName(x)) .str(); } - NODE(parser, OmpNowait) NODE(parser, OmpObject) NODE(parser, OmpObjectList) NODE(parser, OmpProcBindClause) diff --git a/flang/include/flang/Parser/parse-tree.h b/flang/include/flang/Parser/parse-tree.h index 59fa278e00296..09c61477d2e76 100644 --- a/flang/include/flang/Parser/parse-tree.h +++ b/flang/include/flang/Parser/parse-tree.h @@ -3456,9 +3456,6 @@ struct OmpDependClause { std::variant u; }; -// 2.7.1 nowait-clause -> NOWAIT -EMPTY_CLASS(OmpNowait); - // dist_schedule clause does not fit in generic clause class for tablegen. // Therefore it is declared separatly here. WRAPPER_CLASS(OmpDistScheduleClause, std::optional); diff --git a/flang/lib/Parser/openmp-parsers.cpp b/flang/lib/Parser/openmp-parsers.cpp index 50999bef8f527..62dd0d1e7d29b 100644 --- a/flang/lib/Parser/openmp-parsers.cpp +++ b/flang/lib/Parser/openmp-parsers.cpp @@ -203,7 +203,7 @@ TYPE_PARSER( "NOGROUP" >> construct(construct()) || "NOTINBRANCH" >> construct(construct()) || - "NOWAIT" >> construct(construct()) || + "NOWAIT" >> construct(construct()) || "NUM_TASKS" >> construct(construct( parenthesized(scalarIntExpr))) || "NUM_TEAMS" >> construct(construct( diff --git a/flang/lib/Parser/unparse.cpp b/flang/lib/Parser/unparse.cpp index 6be063c1b1bc9..5dbf9940e26e1 100644 --- a/flang/lib/Parser/unparse.cpp +++ b/flang/lib/Parser/unparse.cpp @@ -2065,7 +2065,6 @@ class UnparseVisitor { std::get>(x.t)); Word(")"); } - void Unparse(const OmpNowait &) { Word("NOWAIT"); } void Unparse(const OmpDistScheduleClause &x) { Word("DIST_SCHEDULE(STATIC"); Walk(", ", x.v); diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp index 481099b349668..c901630c098bc 100644 --- a/flang/lib/Semantics/check-omp-structure.cpp +++ b/flang/lib/Semantics/check-omp-structure.cpp @@ -418,6 +418,7 @@ CHECK_SIMPLE_CLAUSE(Link, OMPC_link) CHECK_SIMPLE_CLAUSE(Mergeable, OMPC_mergeable) CHECK_SIMPLE_CLAUSE(Nogroup, OMPC_nogroup) CHECK_SIMPLE_CLAUSE(Notinbranch, OMPC_notinbranch) +CHECK_SIMPLE_CLAUSE(Nowait, OMPC_nowait) CHECK_SIMPLE_CLAUSE(To, OMPC_to) CHECK_SIMPLE_CLAUSE(Uniform, OMPC_uniform) CHECK_SIMPLE_CLAUSE(Untied, OMPC_untied) @@ -493,7 +494,6 @@ void OmpStructureChecker::CheckIsVarPartOfAnotherVar( } // Following clauses have a seperate node in parse-tree.h. CHECK_SIMPLE_PARSER_CLAUSE(OmpDistScheduleClause, OMPC_dist_schedule) -CHECK_SIMPLE_PARSER_CLAUSE(OmpNowait, OMPC_nowait) CHECK_SIMPLE_PARSER_CLAUSE(OmpReductionClause, OMPC_reduction) // Atomic-clause CHECK_SIMPLE_PARSER_CLAUSE(OmpAtomicRead, OMPC_read) diff --git a/flang/lib/Semantics/check-omp-structure.h b/flang/lib/Semantics/check-omp-structure.h index 89fc3d9faa21b..72bb9a523366a 100644 --- a/flang/lib/Semantics/check-omp-structure.h +++ b/flang/lib/Semantics/check-omp-structure.h @@ -126,12 +126,12 @@ class OmpStructureChecker void Leave(const parser::OmpClauseList &); void Enter(const parser::OmpClause &); - void Enter(const parser::OmpNowait &); void Enter(const parser::OmpClause::Allocate &); void Enter(const parser::OmpClause::Allocator &); void Enter(const parser::OmpClause::Inbranch &); void Enter(const parser::OmpClause::Mergeable &); void Enter(const parser::OmpClause::Nogroup &); + void Enter(const parser::OmpClause::Nowait &); void Enter(const parser::OmpClause::Notinbranch &); void Enter(const parser::OmpClause::Untied &); void Enter(const parser::OmpClause::Collapse &); diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td index 28b978975ba06..5c8895b5650e2 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMP.td +++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td @@ -147,7 +147,6 @@ def OMPC_Ordered : Clause<"ordered"> { } def OMPC_NoWait : Clause<"nowait"> { let clangClass = "OMPNowaitClause"; - let flangClass = "OmpNowait"; } def OMPC_Untied : Clause<"untied"> { let clangClass = "OMPUntiedClause"; } def OMPC_Mergeable : Clause<"mergeable"> { From 2f5569f6f67a30f7774f7c2d2f3d726752a862ae Mon Sep 17 00:00:00 2001 From: Alex Zinenko Date: Mon, 21 Dec 2020 12:16:30 +0100 Subject: [PATCH 070/378] [mlir] remove deprecated string-based OpBuilder from ODS It has been deprecated with a warning for two months, removing. Reviewed By: mehdi_amini Differential Revision: https://reviews.llvm.org/D93623 --- mlir/include/mlir/IR/OpBase.td | 19 ++------- mlir/test/mlir-tblgen/op-decl.td | 4 +- mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp | 44 ++++----------------- 3 files changed, 12 insertions(+), 55 deletions(-) diff --git a/mlir/include/mlir/IR/OpBase.td b/mlir/include/mlir/IR/OpBase.td index 0f060b2b1a0a5..0ae572c38f499 100644 --- a/mlir/include/mlir/IR/OpBase.td +++ b/mlir/include/mlir/IR/OpBase.td @@ -1939,15 +1939,6 @@ def region; // Marker used to identify the successor list for an op. def successor; -// Base class for custom builders. This is a transient class that will go away -// when the transition to the DAG form of builder declaration is complete. -// Should not be used directly. -class OpBuilderBase { - string params = ?; - dag dagParams = dp; - code body = b; -} - // Class for defining a custom builder. // // TableGen generates several generic builders for each op by default (see @@ -1986,11 +1977,9 @@ class OpBuilderBase { // If an empty string is passed in for `body`, then *only* the builder // declaration will be generated; this provides a way to define complicated // builders entirely in C++. -class OpBuilderDAG : OpBuilderBase; - -// Deprecated version of OpBuilder that takes the builder signature as string. -class OpBuilder : OpBuilderBase<(ins), b> { - let params = p; +class OpBuilderDAG { + dag dagParams = p; + code body = b; } // A base decorator class that may optionally be added to OpVariables. @@ -2068,7 +2057,7 @@ class Op props = []> { // ValueRange operands, // ArrayRef attributes); // ``` - list builders = ?; + list builders = ?; // Avoid generating default build functions. Custom builders must be // provided. diff --git a/mlir/test/mlir-tblgen/op-decl.td b/mlir/test/mlir-tblgen/op-decl.td index 29438f1836a77..13daca67c4759 100644 --- a/mlir/test/mlir-tblgen/op-decl.td +++ b/mlir/test/mlir-tblgen/op-decl.td @@ -34,8 +34,7 @@ def NS_AOp : NS_Op<"a_op", [IsolatedFromAbove, IsolatedFromAbove]> { VariadicRegion:$someRegions ); let builders = [OpBuilderDAG<(ins "Value":$val)>, - OpBuilderDAG<(ins CArg<"int", "0">:$integer)>, - OpBuilder<"double deprecatedForm">]; + OpBuilderDAG<(ins CArg<"int", "0">:$integer)>]; let parser = [{ foo }]; let printer = [{ bar }]; let verifier = [{ baz }]; @@ -84,7 +83,6 @@ def NS_AOp : NS_Op<"a_op", [IsolatedFromAbove, IsolatedFromAbove]> { // CHECK: ::llvm::Optional< ::llvm::APFloat > attr2(); // CHECK: static void build(::mlir::OpBuilder &odsBuilder, ::mlir::OperationState &odsState, Value val); // CHECK: static void build(::mlir::OpBuilder &odsBuilder, ::mlir::OperationState &odsState, int integer = 0); -// CHECK: static void build(::mlir::OpBuilder &odsBuilder, ::mlir::OperationState &odsState, double deprecatedForm); // CHECK: static void build(::mlir::OpBuilder &odsBuilder, ::mlir::OperationState &odsState, ::mlir::Type r, ::mlir::TypeRange s, ::mlir::Value a, ::mlir::ValueRange b, ::mlir::IntegerAttr attr1, /*optional*/::mlir::FloatAttr attr2, unsigned someRegionsCount) // CHECK: static void build(::mlir::OpBuilder &odsBuilder, ::mlir::OperationState &odsState, ::mlir::Type r, ::mlir::TypeRange s, ::mlir::Value a, ::mlir::ValueRange b, uint32_t attr1, /*optional*/::mlir::FloatAttr attr2, unsigned someRegionsCount) // CHECK: static void build(::mlir::OpBuilder &, ::mlir::OperationState &odsState, ::mlir::TypeRange resultTypes, ::mlir::ValueRange operands, ::llvm::ArrayRef<::mlir::NamedAttribute> attributes, unsigned numRegions) diff --git a/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp b/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp index 1c8cbfb9db383..40e1c355daf87 100644 --- a/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp +++ b/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp @@ -1304,8 +1304,7 @@ void OpEmitter::genUseAttrAsResultTypeBuilder() { /// Updates the context `fctx` to enable replacement of $_builder and $_state /// in the body. Reports errors at `loc`. static std::string builderSignatureFromDAG(const DagInit *init, - ArrayRef loc, - FmtContext &fctx) { + ArrayRef loc) { auto *defInit = dyn_cast(init->getOperator()); if (!defInit || !defInit->getDef()->getName().equals("ins")) PrintFatalError(loc, "expected 'ins' in builders"); @@ -1351,31 +1350,9 @@ static std::string builderSignatureFromDAG(const DagInit *init, llvm::formatv("{0} {1}{2}", type, name, defaultValue).str()); } - fctx.withBuilder(builder); - fctx.addSubst("_state", builderOpState); - return llvm::join(arguments, ", "); } -// Returns a signature fo the builder as defined by a string initializer, -// optionally injecting the builder and state arguments. -// TODO: to be removed after the transition is complete. -static std::string builderSignatureFromString(StringRef params, - FmtContext &fctx) { - bool skipParamGen = params.startswith("OpBuilder") || - params.startswith("mlir::OpBuilder") || - params.startswith("::mlir::OpBuilder"); - if (skipParamGen) - return params.str(); - - fctx.withBuilder(builder); - fctx.addSubst("_state", builderOpState); - return std::string(llvm::formatv("::mlir::OpBuilder &{0}, " - "::mlir::OperationState &{1}{2}{3}", - builder, builderOpState, - params.empty() ? "" : ", ", params)); -} - void OpEmitter::genBuilder() { // Handle custom builders if provided. // TODO: Create wrapper class for OpBuilder to hide the native @@ -1385,19 +1362,8 @@ void OpEmitter::genBuilder() { if (listInit) { for (Init *init : listInit->getValues()) { Record *builderDef = cast(init)->getDef(); - llvm::Optional params = - builderDef->getValueAsOptionalString("params"); - FmtContext fctx; - if (params.hasValue()) { - PrintWarning(op.getLoc(), - "Op uses a deprecated, string-based OpBuilder format; " - "use OpBuilderDAG with '(ins <...>)' instead"); - } - std::string paramStr = - params.hasValue() ? builderSignatureFromString(params->trim(), fctx) - : builderSignatureFromDAG( - builderDef->getValueAsDag("dagParams"), - op.getLoc(), fctx); + std::string paramStr = builderSignatureFromDAG( + builderDef->getValueAsDag("dagParams"), op.getLoc()); StringRef body = builderDef->getValueAsString("body"); bool hasBody = !body.empty(); @@ -1405,6 +1371,10 @@ void OpEmitter::genBuilder() { hasBody ? OpMethod::MP_Static : OpMethod::MP_StaticDeclaration; auto *method = opClass.addMethodAndPrune("void", "build", properties, paramStr); + + FmtContext fctx; + fctx.withBuilder(builder); + fctx.addSubst("_state", builderOpState); if (hasBody) method->body() << tgfmt(body, &fctx); } From 1a6f43991ff7e5249f24660074f0dd784aeecd5f Mon Sep 17 00:00:00 2001 From: sameeran joshi Date: Mon, 21 Dec 2020 22:44:22 +0530 Subject: [PATCH 071/378] [Flang][openmp][5/5] Make dist_schedule clause part of OmpClause After discussion in `D93482` we found that the some of the clauses were not following the common OmpClause convention. The benefits of using OmpClause: - Functionalities from structure checker are mostly aligned to work with `llvm::omp::Clause`. - The unparsing as well can take advantage. - Homogeneity with OpenACC and rest of the clauses in OpenMP. - Could even generate the parser with TableGen, when there is homogeneity. - It becomes confusing when to use `flangClass` and `flangClassValue` inside TableGen, if incase we generate parser using TableGen we could have only a single `let expression`. This patch makes `OmpDistScheduleClause` clause part of `OmpClause`. The unparse function for `OmpDistScheduleClause` is adapted since the keyword and parenthesis are issued by the corresponding unparse function for `parser::OmpClause::DistSchedule`. Reviewed By: clementval, kiranktp Differential Revision: https://reviews.llvm.org/D93644 --- flang/include/flang/Parser/dump-parse-tree.h | 1 - flang/include/flang/Parser/parse-tree.h | 4 ---- flang/lib/Parser/openmp-parsers.cpp | 2 +- flang/lib/Parser/unparse.cpp | 5 ----- flang/lib/Semantics/check-omp-structure.cpp | 2 +- flang/lib/Semantics/check-omp-structure.h | 2 +- llvm/include/llvm/Frontend/OpenMP/OMP.td | 1 - 7 files changed, 3 insertions(+), 14 deletions(-) diff --git a/flang/include/flang/Parser/dump-parse-tree.h b/flang/include/flang/Parser/dump-parse-tree.h index 05152f8c49c68..60e00002a2df2 100644 --- a/flang/include/flang/Parser/dump-parse-tree.h +++ b/flang/include/flang/Parser/dump-parse-tree.h @@ -485,7 +485,6 @@ class ParseTreeDumper { NODE_ENUM(OmpDependenceType, Type) NODE(parser, OmpDependSinkVec) NODE(parser, OmpDependSinkVecLength) - NODE(parser, OmpDistScheduleClause) NODE(parser, OmpEndAtomic) NODE(parser, OmpEndBlockDirective) NODE(parser, OmpEndCriticalDirective) diff --git a/flang/include/flang/Parser/parse-tree.h b/flang/include/flang/Parser/parse-tree.h index 09c61477d2e76..7e258b668576e 100644 --- a/flang/include/flang/Parser/parse-tree.h +++ b/flang/include/flang/Parser/parse-tree.h @@ -3456,10 +3456,6 @@ struct OmpDependClause { std::variant u; }; -// dist_schedule clause does not fit in generic clause class for tablegen. -// Therefore it is declared separatly here. -WRAPPER_CLASS(OmpDistScheduleClause, std::optional); - // OpenMP Clauses struct OmpClause { UNION_CLASS_BOILERPLATE(OmpClause); diff --git a/flang/lib/Parser/openmp-parsers.cpp b/flang/lib/Parser/openmp-parsers.cpp index 62dd0d1e7d29b..1386b2b16a788 100644 --- a/flang/lib/Parser/openmp-parsers.cpp +++ b/flang/lib/Parser/openmp-parsers.cpp @@ -176,7 +176,7 @@ TYPE_PARSER( "DEVICE" >> construct(construct( parenthesized(scalarIntExpr))) || "DIST_SCHEDULE" >> - construct(construct( + construct(construct( parenthesized("STATIC" >> maybe("," >> scalarIntExpr)))) || "FINAL" >> construct(construct( parenthesized(scalarLogicalExpr))) || diff --git a/flang/lib/Parser/unparse.cpp b/flang/lib/Parser/unparse.cpp index 5dbf9940e26e1..fdb694f3d26f5 100644 --- a/flang/lib/Parser/unparse.cpp +++ b/flang/lib/Parser/unparse.cpp @@ -2065,11 +2065,6 @@ class UnparseVisitor { std::get>(x.t)); Word(")"); } - void Unparse(const OmpDistScheduleClause &x) { - Word("DIST_SCHEDULE(STATIC"); - Walk(", ", x.v); - Put(")"); - } #define GEN_FLANG_CLAUSE_UNPARSE #include "llvm/Frontend/OpenMP/OMP.inc" void Unparse(const OmpLoopDirective &x) { diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp index c901630c098bc..e2c8333ce7ee4 100644 --- a/flang/lib/Semantics/check-omp-structure.cpp +++ b/flang/lib/Semantics/check-omp-structure.cpp @@ -430,6 +430,7 @@ CHECK_SIMPLE_CLAUSE(Release, OMPC_release) CHECK_SIMPLE_CLAUSE(Relaxed, OMPC_relaxed) CHECK_SIMPLE_CLAUSE(Hint, OMPC_hint) CHECK_SIMPLE_CLAUSE(ProcBind, OMPC_proc_bind) +CHECK_SIMPLE_CLAUSE(DistSchedule, OMPC_dist_schedule) CHECK_REQ_SCALAR_INT_CLAUSE(Allocator, OMPC_allocator) CHECK_REQ_SCALAR_INT_CLAUSE(Grainsize, OMPC_grainsize) @@ -493,7 +494,6 @@ void OmpStructureChecker::CheckIsVarPartOfAnotherVar( } } // Following clauses have a seperate node in parse-tree.h. -CHECK_SIMPLE_PARSER_CLAUSE(OmpDistScheduleClause, OMPC_dist_schedule) CHECK_SIMPLE_PARSER_CLAUSE(OmpReductionClause, OMPC_reduction) // Atomic-clause CHECK_SIMPLE_PARSER_CLAUSE(OmpAtomicRead, OMPC_read) diff --git a/flang/lib/Semantics/check-omp-structure.h b/flang/lib/Semantics/check-omp-structure.h index 72bb9a523366a..a966eaf8c4a7d 100644 --- a/flang/lib/Semantics/check-omp-structure.h +++ b/flang/lib/Semantics/check-omp-structure.h @@ -139,6 +139,7 @@ class OmpStructureChecker void Enter(const parser::OmpClause::Copyprivate &); void Enter(const parser::OmpClause::Default &); void Enter(const parser::OmpClause::Device &); + void Enter(const parser::OmpClause::DistSchedule &); void Enter(const parser::OmpClause::Final &); void Enter(const parser::OmpClause::Firstprivate &); void Enter(const parser::OmpClause::From &); @@ -179,7 +180,6 @@ class OmpStructureChecker void Enter(const parser::OmpAlignedClause &); void Enter(const parser::OmpDefaultmapClause &); void Enter(const parser::OmpDependClause &); - void Enter(const parser::OmpDistScheduleClause &); void Enter(const parser::OmpIfClause &); void Enter(const parser::OmpLinearClause &); void Enter(const parser::OmpMapClause &); diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td index 5c8895b5650e2..fa67a64fa9970 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMP.td +++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td @@ -204,7 +204,6 @@ def OMPC_Hint : Clause<"hint"> { } def OMPC_DistSchedule : Clause<"dist_schedule"> { let clangClass = "OMPDistScheduleClause"; - let flangClass = "OmpDistScheduleClause"; let flangClassValue = "ScalarIntExpr"; let isValueOptional = true; } From a6783cd7b65d6c739c4dc20ec5191b2e862a4237 Mon Sep 17 00:00:00 2001 From: David Spickett Date: Tue, 15 Dec 2020 15:49:26 +0000 Subject: [PATCH 072/378] [llvm][ARM/AArch64] Convert Target Parser CPU tests to fixtures Also convert the test function to use EXPECT_EQ and remove the special case for the AEK_NONE extension. This means that each test is marked as failing separatley and the accumultated EXPECT failures are printed next to that test, with its parameters. Before they would be hidden by the "pass &=" pattern and failures would print in one block since it was a "single" test. Example of the new failure messages: ``` ARMCPUTestsPart1/ARMCPUTestFixture.ARMCPUTests/6 [==========] Running 1 test from 1 test case. [----------] Global test environment set-up. [----------] 1 test from ARMCPUTestsPart1/ARMCPUTestFixture [ RUN ] ARMCPUTestsPart1/ARMCPUTestFixture.ARMCPUTests/6 /work/open_source/nightly-llvm/llvm-project/llvm/unittests/Support/TargetParserTest.cpp:66: Failure Expected: params.ExpectedFlags Which is: 3405705229 To be equal to: default_extensions Which is: 1 [ FAILED ] ARMCPUTestsPart1/ARMCPUTestFixture.ARMCPUTests/6, where GetParam() = "arm8", "armv4", "none", 0xcafef00d, "4" (0 ms) ``` Reviewed By: MarkMurrayARM Differential Revision: https://reviews.llvm.org/D93392 --- llvm/unittests/Support/TargetParserTest.cpp | 1067 ++++++++++--------- 1 file changed, 552 insertions(+), 515 deletions(-) diff --git a/llvm/unittests/Support/TargetParserTest.cpp b/llvm/unittests/Support/TargetParserTest.cpp index 8407397e4ed87..5208f6a75b022 100644 --- a/llvm/unittests/Support/TargetParserTest.cpp +++ b/llvm/unittests/Support/TargetParserTest.cpp @@ -31,308 +31,315 @@ const char *ARMArch[] = { "armv8m.main", "iwmmxt", "iwmmxt2", "xscale", "armv8.1-m.main", }; -bool testARMCPU(StringRef CPUName, StringRef ExpectedArch, - StringRef ExpectedFPU, uint64_t ExpectedFlags, - StringRef CPUAttr) { - ARM::ArchKind AK = ARM::parseCPUArch(CPUName); - bool pass = ARM::getArchName(AK).equals(ExpectedArch); - unsigned FPUKind = ARM::getDefaultFPU(CPUName, AK); - pass &= ARM::getFPUName(FPUKind).equals(ExpectedFPU); - - uint64_t ExtKind = ARM::getDefaultExtensions(CPUName, AK); - if (ExtKind > 1 && (ExtKind & ARM::AEK_NONE)) - pass &= ((ExtKind ^ ARM::AEK_NONE) == ExpectedFlags); - else - pass &= (ExtKind == ExpectedFlags); - pass &= ARM::getCPUAttr(AK).equals(CPUAttr); - - return pass; +struct ARMCPUTestParams { + ARMCPUTestParams(StringRef CPUName, StringRef ExpectedArch, + StringRef ExpectedFPU, uint64_t ExpectedFlags, + StringRef CPUAttr) + : CPUName(CPUName), ExpectedArch(ExpectedArch), ExpectedFPU(ExpectedFPU), + ExpectedFlags(ExpectedFlags), CPUAttr(CPUAttr) {} + + friend std::ostream &operator<<(std::ostream &os, + const ARMCPUTestParams ¶ms) { + return os << "\"" << params.CPUName.str() << "\", \"" + << params.ExpectedArch.str() << "\", \"" + << params.ExpectedFPU.str() << "\", 0x" << std::hex + << params.ExpectedFlags << ", \"" << params.CPUAttr.str() << "\""; + } + + StringRef CPUName; + StringRef ExpectedArch; + StringRef ExpectedFPU; + uint64_t ExpectedFlags; + StringRef CPUAttr; +}; + +class ARMCPUTestFixture : public ::testing::TestWithParam {}; + +TEST_P(ARMCPUTestFixture, ARMCPUTests) { + auto params = GetParam(); + + ARM::ArchKind AK = ARM::parseCPUArch(params.CPUName); + EXPECT_EQ(params.ExpectedArch, ARM::getArchName(AK)); + + unsigned FPUKind = ARM::getDefaultFPU(params.CPUName, AK); + EXPECT_EQ(params.ExpectedFPU, ARM::getFPUName(FPUKind)); + + uint64_t default_extensions = ARM::getDefaultExtensions(params.CPUName, AK); + EXPECT_EQ(params.ExpectedFlags, default_extensions); + + EXPECT_EQ(params.CPUAttr, ARM::getCPUAttr(AK)); } -TEST(TargetParserTest, testARMCPU) { - EXPECT_TRUE(testARMCPU("invalid", "invalid", "invalid", - ARM::AEK_NONE, "")); - EXPECT_TRUE(testARMCPU("generic", "invalid", "none", - ARM::AEK_NONE, "")); - - EXPECT_TRUE(testARMCPU("arm2", "armv2", "none", - ARM::AEK_NONE, "2")); - EXPECT_TRUE(testARMCPU("arm3", "armv2a", "none", - ARM::AEK_NONE, "2A")); - EXPECT_TRUE(testARMCPU("arm6", "armv3", "none", - ARM::AEK_NONE, "3")); - EXPECT_TRUE(testARMCPU("arm7m", "armv3m", "none", - ARM::AEK_NONE, "3M")); - EXPECT_TRUE(testARMCPU("arm8", "armv4", "none", - ARM::AEK_NONE, "4")); - EXPECT_TRUE(testARMCPU("arm810", "armv4", "none", - ARM::AEK_NONE, "4")); - EXPECT_TRUE(testARMCPU("strongarm", "armv4", "none", - ARM::AEK_NONE, "4")); - EXPECT_TRUE(testARMCPU("strongarm110", "armv4", "none", - ARM::AEK_NONE, "4")); - EXPECT_TRUE(testARMCPU("strongarm1100", "armv4", "none", - ARM::AEK_NONE, "4")); - EXPECT_TRUE(testARMCPU("strongarm1110", "armv4", "none", - ARM::AEK_NONE, "4")); - EXPECT_TRUE(testARMCPU("arm7tdmi", "armv4t", "none", - ARM::AEK_NONE, "4T")); - EXPECT_TRUE(testARMCPU("arm7tdmi-s", "armv4t", "none", - ARM::AEK_NONE, "4T")); - EXPECT_TRUE(testARMCPU("arm710t", "armv4t", "none", - ARM::AEK_NONE, "4T")); - EXPECT_TRUE(testARMCPU("arm720t", "armv4t", "none", - ARM::AEK_NONE, "4T")); - EXPECT_TRUE(testARMCPU("arm9", "armv4t", "none", - ARM::AEK_NONE, "4T")); - EXPECT_TRUE(testARMCPU("arm9tdmi", "armv4t", "none", - ARM::AEK_NONE, "4T")); - EXPECT_TRUE(testARMCPU("arm920", "armv4t", "none", - ARM::AEK_NONE, "4T")); - EXPECT_TRUE(testARMCPU("arm920t", "armv4t", "none", - ARM::AEK_NONE, "4T")); - EXPECT_TRUE(testARMCPU("arm922t", "armv4t", "none", - ARM::AEK_NONE, "4T")); - EXPECT_TRUE(testARMCPU("arm9312", "armv4t", "none", - ARM::AEK_NONE, "4T")); - EXPECT_TRUE(testARMCPU("arm940t", "armv4t", "none", - ARM::AEK_NONE, "4T")); - EXPECT_TRUE(testARMCPU("ep9312", "armv4t", "none", - ARM::AEK_NONE, "4T")); - EXPECT_TRUE(testARMCPU("arm10tdmi", "armv5t", "none", - ARM::AEK_NONE, "5T")); - EXPECT_TRUE(testARMCPU("arm1020t", "armv5t", "none", - ARM::AEK_NONE, "5T")); - EXPECT_TRUE(testARMCPU("arm9e", "armv5te", "none", - ARM::AEK_DSP, "5TE")); - EXPECT_TRUE(testARMCPU("arm946e-s", "armv5te", "none", - ARM::AEK_DSP, "5TE")); - EXPECT_TRUE(testARMCPU("arm966e-s", "armv5te", "none", - ARM::AEK_DSP, "5TE")); - EXPECT_TRUE(testARMCPU("arm968e-s", "armv5te", "none", - ARM::AEK_DSP, "5TE")); - EXPECT_TRUE(testARMCPU("arm10e", "armv5te", "none", - ARM::AEK_DSP, "5TE")); - EXPECT_TRUE(testARMCPU("arm1020e", "armv5te", "none", - ARM::AEK_DSP, "5TE")); - EXPECT_TRUE(testARMCPU("arm1022e", "armv5te", "none", - ARM::AEK_DSP, "5TE")); - EXPECT_TRUE(testARMCPU("arm926ej-s", "armv5tej", "none", - ARM::AEK_DSP, "5TEJ")); - EXPECT_TRUE(testARMCPU("arm1136j-s", "armv6", "none", - ARM::AEK_DSP, "6")); - EXPECT_TRUE(testARMCPU("arm1136jf-s", "armv6", "vfpv2", - ARM::AEK_DSP, "6")); - EXPECT_TRUE(testARMCPU("arm1136jz-s", "armv6", "none", - ARM::AEK_DSP, "6")); - EXPECT_TRUE(testARMCPU("arm1176jz-s", "armv6kz", "none", - ARM::AEK_SEC | ARM::AEK_DSP, "6KZ")); - EXPECT_TRUE(testARMCPU("mpcore", "armv6k", "vfpv2", - ARM::AEK_DSP, "6K")); - EXPECT_TRUE(testARMCPU("mpcorenovfp", "armv6k", "none", - ARM::AEK_DSP, "6K")); - EXPECT_TRUE(testARMCPU("arm1176jzf-s", "armv6kz", "vfpv2", - ARM::AEK_SEC | ARM::AEK_DSP, "6KZ")); - EXPECT_TRUE(testARMCPU("arm1156t2-s", "armv6t2", "none", - ARM::AEK_DSP, "6T2")); - EXPECT_TRUE(testARMCPU("arm1156t2f-s", "armv6t2", "vfpv2", - ARM::AEK_DSP, "6T2")); - EXPECT_TRUE(testARMCPU("cortex-m0", "armv6-m", "none", - ARM::AEK_NONE, "6-M")); - EXPECT_TRUE(testARMCPU("cortex-m0plus", "armv6-m", "none", - ARM::AEK_NONE, "6-M")); - EXPECT_TRUE(testARMCPU("cortex-m1", "armv6-m", "none", - ARM::AEK_NONE, "6-M")); - EXPECT_TRUE(testARMCPU("sc000", "armv6-m", "none", - ARM::AEK_NONE, "6-M")); - EXPECT_TRUE(testARMCPU("cortex-a5", "armv7-a", "neon-vfpv4", - ARM::AEK_MP | ARM::AEK_SEC | ARM::AEK_DSP, "7-A")); - EXPECT_TRUE(testARMCPU("cortex-a7", "armv7-a", "neon-vfpv4", +// Note that we include ARM::AEK_NONE even when there are other extensions +// we expect. This is because the default extensions for a CPU are the sum +// of the default extensions for its architecture and for the CPU. +// So if a CPU has no extra extensions, it adds AEK_NONE. +INSTANTIATE_TEST_CASE_P( + ARMCPUTestsPart1, ARMCPUTestFixture, + ::testing::Values( + ARMCPUTestParams("invalid", "invalid", "invalid", ARM::AEK_NONE, ""), + ARMCPUTestParams("generic", "invalid", "none", ARM::AEK_NONE, ""), + + ARMCPUTestParams("arm2", "armv2", "none", ARM::AEK_NONE, "2"), + ARMCPUTestParams("arm3", "armv2a", "none", ARM::AEK_NONE, "2A"), + ARMCPUTestParams("arm6", "armv3", "none", ARM::AEK_NONE, "3"), + ARMCPUTestParams("arm7m", "armv3m", "none", ARM::AEK_NONE, "3M"), + ARMCPUTestParams("arm8", "armv4", "none", ARM::AEK_NONE, "4"), + ARMCPUTestParams("arm810", "armv4", "none", ARM::AEK_NONE, "4"), + ARMCPUTestParams("strongarm", "armv4", "none", ARM::AEK_NONE, "4"), + ARMCPUTestParams("strongarm110", "armv4", "none", ARM::AEK_NONE, "4"), + ARMCPUTestParams("strongarm1100", "armv4", "none", ARM::AEK_NONE, "4"), + ARMCPUTestParams("strongarm1110", "armv4", "none", ARM::AEK_NONE, "4"), + ARMCPUTestParams("arm7tdmi", "armv4t", "none", ARM::AEK_NONE, "4T"), + ARMCPUTestParams("arm7tdmi-s", "armv4t", "none", ARM::AEK_NONE, "4T"), + ARMCPUTestParams("arm710t", "armv4t", "none", ARM::AEK_NONE, "4T"), + ARMCPUTestParams("arm720t", "armv4t", "none", ARM::AEK_NONE, "4T"), + ARMCPUTestParams("arm9", "armv4t", "none", ARM::AEK_NONE, "4T"), + ARMCPUTestParams("arm9tdmi", "armv4t", "none", ARM::AEK_NONE, "4T"), + ARMCPUTestParams("arm920", "armv4t", "none", ARM::AEK_NONE, "4T"), + ARMCPUTestParams("arm920t", "armv4t", "none", ARM::AEK_NONE, "4T"), + ARMCPUTestParams("arm922t", "armv4t", "none", ARM::AEK_NONE, "4T"), + ARMCPUTestParams("arm9312", "armv4t", "none", ARM::AEK_NONE, "4T"), + ARMCPUTestParams("arm940t", "armv4t", "none", ARM::AEK_NONE, "4T"), + ARMCPUTestParams("ep9312", "armv4t", "none", ARM::AEK_NONE, "4T"), + ARMCPUTestParams("arm10tdmi", "armv5t", "none", ARM::AEK_NONE, "5T"), + ARMCPUTestParams("arm1020t", "armv5t", "none", ARM::AEK_NONE, "5T"), + ARMCPUTestParams("arm9e", "armv5te", "none", + ARM::AEK_NONE | ARM::AEK_DSP, "5TE"), + ARMCPUTestParams("arm946e-s", "armv5te", "none", + ARM::AEK_NONE | ARM::AEK_DSP, "5TE"), + ARMCPUTestParams("arm966e-s", "armv5te", "none", + ARM::AEK_NONE | ARM::AEK_DSP, "5TE"), + ARMCPUTestParams("arm968e-s", "armv5te", "none", + ARM::AEK_NONE | ARM::AEK_DSP, "5TE"), + ARMCPUTestParams("arm10e", "armv5te", "none", + ARM::AEK_NONE | ARM::AEK_DSP, "5TE"), + ARMCPUTestParams("arm1020e", "armv5te", "none", + ARM::AEK_NONE | ARM::AEK_DSP, "5TE"), + ARMCPUTestParams("arm1022e", "armv5te", "none", + ARM::AEK_NONE | ARM::AEK_DSP, "5TE"), + ARMCPUTestParams("arm926ej-s", "armv5tej", "none", + ARM::AEK_NONE | ARM::AEK_DSP, "5TEJ"), + ARMCPUTestParams("arm1136j-s", "armv6", "none", + ARM::AEK_NONE | ARM::AEK_DSP, "6"), + ARMCPUTestParams("arm1136jf-s", "armv6", "vfpv2", + ARM::AEK_NONE | ARM::AEK_DSP, "6"), + ARMCPUTestParams("arm1136jz-s", "armv6", "none", + ARM::AEK_NONE | ARM::AEK_DSP, "6"), + ARMCPUTestParams("arm1176jz-s", "armv6kz", "none", + ARM::AEK_NONE | ARM::AEK_SEC | ARM::AEK_DSP, "6KZ"), + ARMCPUTestParams("mpcore", "armv6k", "vfpv2", + ARM::AEK_NONE | ARM::AEK_DSP, "6K"), + ARMCPUTestParams("mpcorenovfp", "armv6k", "none", + ARM::AEK_NONE | ARM::AEK_DSP, "6K"), + ARMCPUTestParams("arm1176jzf-s", "armv6kz", "vfpv2", + ARM::AEK_NONE | ARM::AEK_SEC | ARM::AEK_DSP, "6KZ"), + ARMCPUTestParams("arm1156t2-s", "armv6t2", "none", + ARM::AEK_NONE | ARM::AEK_DSP, "6T2"), + ARMCPUTestParams("arm1156t2f-s", "armv6t2", "vfpv2", + ARM::AEK_NONE | ARM::AEK_DSP, "6T2"), + ARMCPUTestParams("cortex-m0", "armv6-m", "none", ARM::AEK_NONE, "6-M"), + ARMCPUTestParams("cortex-m0plus", "armv6-m", "none", ARM::AEK_NONE, + "6-M"), + ARMCPUTestParams("cortex-m1", "armv6-m", "none", ARM::AEK_NONE, "6-M"), + ARMCPUTestParams("sc000", "armv6-m", "none", ARM::AEK_NONE, "6-M"), + ARMCPUTestParams("cortex-a5", "armv7-a", "neon-vfpv4", + ARM::AEK_MP | ARM::AEK_SEC | ARM::AEK_DSP, "7-A"), + ARMCPUTestParams("cortex-a7", "armv7-a", "neon-vfpv4", ARM::AEK_HWDIVTHUMB | ARM::AEK_HWDIVARM | ARM::AEK_MP | ARM::AEK_SEC | ARM::AEK_VIRT | ARM::AEK_DSP, - "7-A")); - EXPECT_TRUE(testARMCPU("cortex-a8", "armv7-a", "neon", - ARM::AEK_SEC | ARM::AEK_DSP, "7-A")); - EXPECT_TRUE(testARMCPU("cortex-a9", "armv7-a", "neon-fp16", - ARM::AEK_MP | ARM::AEK_SEC | ARM::AEK_DSP, "7-A")); - EXPECT_TRUE(testARMCPU("cortex-a12", "armv7-a", "neon-vfpv4", + "7-A"), + ARMCPUTestParams("cortex-a8", "armv7-a", "neon", + ARM::AEK_SEC | ARM::AEK_DSP, "7-A")), ); + +// gtest in llvm has a limit of 50 test cases when using ::Values so we split +// them into 2 blocks +INSTANTIATE_TEST_CASE_P( + ARMCPUTestsPart2, ARMCPUTestFixture, + ::testing::Values( + ARMCPUTestParams("cortex-a9", "armv7-a", "neon-fp16", + ARM::AEK_MP | ARM::AEK_SEC | ARM::AEK_DSP, "7-A"), + ARMCPUTestParams("cortex-a12", "armv7-a", "neon-vfpv4", ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM | ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP, - "7-A")); - EXPECT_TRUE(testARMCPU("cortex-a15", "armv7-a", "neon-vfpv4", + "7-A"), + ARMCPUTestParams("cortex-a15", "armv7-a", "neon-vfpv4", ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM | ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP, - "7-A")); - EXPECT_TRUE(testARMCPU("cortex-a17", "armv7-a", "neon-vfpv4", + "7-A"), + ARMCPUTestParams("cortex-a17", "armv7-a", "neon-vfpv4", ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM | ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP, - "7-A")); - EXPECT_TRUE(testARMCPU("krait", "armv7-a", "neon-vfpv4", + "7-A"), + ARMCPUTestParams("krait", "armv7-a", "neon-vfpv4", ARM::AEK_HWDIVARM | ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP, - "7-A")); - EXPECT_TRUE(testARMCPU("cortex-r4", "armv7-r", "none", - ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP, "7-R")); - EXPECT_TRUE(testARMCPU("cortex-r4f", "armv7-r", "vfpv3-d16", - ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP, "7-R")); - EXPECT_TRUE(testARMCPU("cortex-r5", "armv7-r", "vfpv3-d16", + "7-A"), + ARMCPUTestParams("cortex-r4", "armv7-r", "none", + ARM::AEK_NONE | ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP, + "7-R"), + ARMCPUTestParams("cortex-r4f", "armv7-r", "vfpv3-d16", + ARM::AEK_NONE | ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP, + "7-R"), + ARMCPUTestParams("cortex-r5", "armv7-r", "vfpv3-d16", ARM::AEK_MP | ARM::AEK_HWDIVARM | ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP, - "7-R")); - EXPECT_TRUE(testARMCPU("cortex-r7", "armv7-r", "vfpv3-d16-fp16", + "7-R"), + ARMCPUTestParams("cortex-r7", "armv7-r", "vfpv3-d16-fp16", ARM::AEK_MP | ARM::AEK_HWDIVARM | ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP, - "7-R")); - EXPECT_TRUE(testARMCPU("cortex-r8", "armv7-r", "vfpv3-d16-fp16", + "7-R"), + ARMCPUTestParams("cortex-r8", "armv7-r", "vfpv3-d16-fp16", ARM::AEK_MP | ARM::AEK_HWDIVARM | ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP, - "7-R")); - EXPECT_TRUE(testARMCPU("cortex-r52", "armv8-r", "neon-fp-armv8", - ARM::AEK_CRC | ARM::AEK_MP | ARM::AEK_VIRT | - ARM::AEK_HWDIVARM | ARM::AEK_HWDIVTHUMB | - ARM::AEK_DSP, - "8-R")); - EXPECT_TRUE( - testARMCPU("sc300", "armv7-m", "none", ARM::AEK_HWDIVTHUMB, "7-M")); - EXPECT_TRUE( - testARMCPU("cortex-m3", "armv7-m", "none", ARM::AEK_HWDIVTHUMB, "7-M")); - EXPECT_TRUE(testARMCPU("cortex-m4", "armv7e-m", "fpv4-sp-d16", - ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP, "7E-M")); - EXPECT_TRUE(testARMCPU("cortex-m7", "armv7e-m", "fpv5-d16", - ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP, "7E-M")); - EXPECT_TRUE(testARMCPU("cortex-a32", "armv8-a", "crypto-neon-fp-armv8", + "7-R"), + ARMCPUTestParams("cortex-r52", "armv8-r", "neon-fp-armv8", + ARM::AEK_NONE | ARM::AEK_CRC | ARM::AEK_MP | + ARM::AEK_VIRT | ARM::AEK_HWDIVARM | + ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP, + "8-R"), + ARMCPUTestParams("sc300", "armv7-m", "none", + ARM::AEK_NONE | ARM::AEK_HWDIVTHUMB, "7-M"), + ARMCPUTestParams("cortex-m3", "armv7-m", "none", + ARM::AEK_NONE | ARM::AEK_HWDIVTHUMB, "7-M"), + ARMCPUTestParams("cortex-m4", "armv7e-m", "fpv4-sp-d16", + ARM::AEK_NONE | ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP, + "7E-M"), + ARMCPUTestParams("cortex-m7", "armv7e-m", "fpv5-d16", + ARM::AEK_NONE | ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP, + "7E-M"), + ARMCPUTestParams("cortex-a32", "armv8-a", "crypto-neon-fp-armv8", ARM::AEK_CRC | ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM | ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP, - "8-A")); - EXPECT_TRUE(testARMCPU("cortex-a35", "armv8-a", "crypto-neon-fp-armv8", + "8-A"), + ARMCPUTestParams("cortex-a35", "armv8-a", "crypto-neon-fp-armv8", ARM::AEK_CRC | ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM | ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP, - "8-A")); - EXPECT_TRUE(testARMCPU("cortex-a53", "armv8-a", "crypto-neon-fp-armv8", + "8-A"), + ARMCPUTestParams("cortex-a53", "armv8-a", "crypto-neon-fp-armv8", ARM::AEK_CRC | ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM | ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP, - "8-A")); - EXPECT_TRUE(testARMCPU("cortex-a55", "armv8.2-a", "crypto-neon-fp-armv8", + "8-A"), + ARMCPUTestParams("cortex-a55", "armv8.2-a", "crypto-neon-fp-armv8", ARM::AEK_CRC | ARM::AEK_SEC | ARM::AEK_MP | - ARM::AEK_VIRT | ARM::AEK_HWDIVARM | - ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP | ARM::AEK_FP16 | - ARM::AEK_RAS | ARM::AEK_DOTPROD, - "8.2-A")); - EXPECT_TRUE(testARMCPU("cortex-a57", "armv8-a", "crypto-neon-fp-armv8", + ARM::AEK_VIRT | ARM::AEK_HWDIVARM | + ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP | + ARM::AEK_FP16 | ARM::AEK_RAS | ARM::AEK_DOTPROD, + "8.2-A"), + ARMCPUTestParams("cortex-a57", "armv8-a", "crypto-neon-fp-armv8", ARM::AEK_CRC | ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM | ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP, - "8-A")); - EXPECT_TRUE(testARMCPU("cortex-a72", "armv8-a", "crypto-neon-fp-armv8", + "8-A"), + ARMCPUTestParams("cortex-a72", "armv8-a", "crypto-neon-fp-armv8", ARM::AEK_CRC | ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM | ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP, - "8-A")); - EXPECT_TRUE(testARMCPU("cortex-a73", "armv8-a", "crypto-neon-fp-armv8", + "8-A"), + ARMCPUTestParams("cortex-a73", "armv8-a", "crypto-neon-fp-armv8", ARM::AEK_CRC | ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM | ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP, - "8-A")); - EXPECT_TRUE(testARMCPU("cortex-a75", "armv8.2-a", "crypto-neon-fp-armv8", + "8-A"), + ARMCPUTestParams("cortex-a75", "armv8.2-a", "crypto-neon-fp-armv8", ARM::AEK_CRC | ARM::AEK_SEC | ARM::AEK_MP | - ARM::AEK_VIRT | ARM::AEK_HWDIVARM | - ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP | ARM::AEK_FP16 | - ARM::AEK_RAS | ARM::AEK_DOTPROD, - "8.2-A")); - EXPECT_TRUE(testARMCPU("cortex-a76", "armv8.2-a", "crypto-neon-fp-armv8", + ARM::AEK_VIRT | ARM::AEK_HWDIVARM | + ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP | + ARM::AEK_FP16 | ARM::AEK_RAS | ARM::AEK_DOTPROD, + "8.2-A"), + ARMCPUTestParams("cortex-a76", "armv8.2-a", "crypto-neon-fp-armv8", ARM::AEK_CRC | ARM::AEK_SEC | ARM::AEK_MP | - ARM::AEK_VIRT | ARM::AEK_HWDIVARM | - ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP | ARM::AEK_FP16 | - ARM::AEK_RAS | ARM::AEK_DOTPROD, - "8.2-A")); - EXPECT_TRUE(testARMCPU("cortex-a76ae", "armv8.2-a", "crypto-neon-fp-armv8", + ARM::AEK_VIRT | ARM::AEK_HWDIVARM | + ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP | + ARM::AEK_FP16 | ARM::AEK_RAS | ARM::AEK_DOTPROD, + "8.2-A"), + ARMCPUTestParams("cortex-a76ae", "armv8.2-a", "crypto-neon-fp-armv8", ARM::AEK_CRC | ARM::AEK_SEC | ARM::AEK_MP | - ARM::AEK_VIRT | ARM::AEK_HWDIVARM | - ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP | ARM::AEK_FP16 | - ARM::AEK_RAS | ARM::AEK_DOTPROD, - "8.2-A")); - EXPECT_TRUE(testARMCPU("cortex-a77", "armv8.2-a", "crypto-neon-fp-armv8", + ARM::AEK_VIRT | ARM::AEK_HWDIVARM | + ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP | + ARM::AEK_FP16 | ARM::AEK_RAS | ARM::AEK_DOTPROD, + "8.2-A"), + ARMCPUTestParams("cortex-a77", "armv8.2-a", "crypto-neon-fp-armv8", ARM::AEK_CRC | ARM::AEK_SEC | ARM::AEK_MP | - ARM::AEK_VIRT | ARM::AEK_HWDIVARM | - ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP | ARM::AEK_FP16 | - ARM::AEK_RAS | ARM::AEK_DOTPROD, - "8.2-A")); - EXPECT_TRUE(testARMCPU("cortex-a78", "armv8.2-a", "crypto-neon-fp-armv8", - ARM::AEK_DOTPROD | ARM::AEK_FP16 | - ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT | - ARM::AEK_HWDIVARM | ARM::AEK_HWDIVTHUMB | - ARM::AEK_DSP | ARM::AEK_CRC | ARM::AEK_RAS, - "8.2-A")); - EXPECT_TRUE(testARMCPU("cortex-x1", "armv8.2-a", "crypto-neon-fp-armv8", + ARM::AEK_VIRT | ARM::AEK_HWDIVARM | + ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP | + ARM::AEK_FP16 | ARM::AEK_RAS | ARM::AEK_DOTPROD, + "8.2-A"), + ARMCPUTestParams("cortex-a78", "armv8.2-a", "crypto-neon-fp-armv8", + ARM::AEK_DOTPROD | ARM::AEK_FP16 | ARM::AEK_SEC | + ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM | + ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP | ARM::AEK_CRC | + ARM::AEK_RAS, + "8.2-A"), + ARMCPUTestParams("cortex-x1", "armv8.2-a", "crypto-neon-fp-armv8", ARM::AEK_RAS | ARM::AEK_FP16 | ARM::AEK_DOTPROD | + ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT | + ARM::AEK_HWDIVARM | ARM::AEK_HWDIVTHUMB | + ARM::AEK_DSP | ARM::AEK_CRC | ARM::AEK_RAS, + "8.2-A"), + ARMCPUTestParams("neoverse-n1", "armv8.2-a", "crypto-neon-fp-armv8", + ARM::AEK_CRC | ARM::AEK_SEC | ARM::AEK_MP | + ARM::AEK_VIRT | ARM::AEK_HWDIVARM | + ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP | + ARM::AEK_FP16 | ARM::AEK_RAS | ARM::AEK_DOTPROD, + "8.2-A"), + ARMCPUTestParams("neoverse-n2", "armv8.5-a", "crypto-neon-fp-armv8", + ARM::AEK_CRC | ARM::AEK_HWDIVTHUMB | + ARM::AEK_HWDIVARM | ARM::AEK_MP | ARM::AEK_SEC | + ARM::AEK_VIRT | ARM::AEK_DSP | ARM::AEK_BF16 | + ARM::AEK_DOTPROD | ARM::AEK_RAS | ARM::AEK_I8MM | + ARM::AEK_SB, + "8.5-A"), + ARMCPUTestParams("neoverse-v1", "armv8.4-a", "crypto-neon-fp-armv8", ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT | - ARM::AEK_HWDIVARM | ARM::AEK_HWDIVTHUMB | - ARM::AEK_DSP | ARM::AEK_CRC | ARM::AEK_RAS, - "8.2-A")); - EXPECT_TRUE(testARMCPU("neoverse-n1", "armv8.2-a", "crypto-neon-fp-armv8", - ARM::AEK_CRC | ARM::AEK_SEC | ARM::AEK_MP | - ARM::AEK_VIRT | ARM::AEK_HWDIVARM | - ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP | ARM::AEK_FP16 | - ARM::AEK_RAS | ARM::AEK_DOTPROD, - "8.2-A")); - EXPECT_TRUE(testARMCPU("neoverse-n2", "armv8.5-a", "crypto-neon-fp-armv8", - ARM::AEK_CRC | ARM::AEK_HWDIVTHUMB | ARM::AEK_HWDIVARM | - ARM::AEK_MP | ARM::AEK_SEC | ARM::AEK_VIRT | - ARM::AEK_DSP | ARM::AEK_BF16 | ARM::AEK_DOTPROD | - ARM::AEK_RAS | ARM::AEK_I8MM | ARM::AEK_SB, - "8.5-A")); - EXPECT_TRUE(testARMCPU("neoverse-v1", "armv8.4-a", "crypto-neon-fp-armv8", - ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT | - ARM::AEK_HWDIVARM | ARM::AEK_HWDIVTHUMB | - ARM::AEK_DSP | ARM::AEK_CRC | ARM::AEK_RAS | - ARM::AEK_FP16 | ARM::AEK_BF16 | ARM::AEK_DOTPROD, - "8.4-A")); - EXPECT_TRUE(testARMCPU("cyclone", "armv8-a", "crypto-neon-fp-armv8", + ARM::AEK_HWDIVARM | ARM::AEK_HWDIVTHUMB | + ARM::AEK_DSP | ARM::AEK_CRC | ARM::AEK_RAS | + ARM::AEK_FP16 | ARM::AEK_BF16 | ARM::AEK_DOTPROD, + "8.4-A"), + ARMCPUTestParams("cyclone", "armv8-a", "crypto-neon-fp-armv8", ARM::AEK_CRC | ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM | ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP, - "8-A")); - EXPECT_TRUE(testARMCPU("exynos-m3", "armv8-a", "crypto-neon-fp-armv8", + "8-A"), + ARMCPUTestParams("exynos-m3", "armv8-a", "crypto-neon-fp-armv8", ARM::AEK_CRC | ARM::AEK_SEC | ARM::AEK_MP | - ARM::AEK_VIRT | ARM::AEK_HWDIVARM | - ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP, - "8-A")); - EXPECT_TRUE(testARMCPU("exynos-m4", "armv8.2-a", "crypto-neon-fp-armv8", + ARM::AEK_VIRT | ARM::AEK_HWDIVARM | + ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP, + "8-A"), + ARMCPUTestParams("exynos-m4", "armv8.2-a", "crypto-neon-fp-armv8", ARM::AEK_CRC | ARM::AEK_SEC | ARM::AEK_MP | - ARM::AEK_VIRT | ARM::AEK_HWDIVARM | - ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP | ARM::AEK_DOTPROD | - ARM::AEK_FP16 | ARM::AEK_RAS, - "8.2-A")); - EXPECT_TRUE(testARMCPU("exynos-m5", "armv8.2-a", "crypto-neon-fp-armv8", + ARM::AEK_VIRT | ARM::AEK_HWDIVARM | + ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP | + ARM::AEK_DOTPROD | ARM::AEK_FP16 | ARM::AEK_RAS, + "8.2-A"), + ARMCPUTestParams("exynos-m5", "armv8.2-a", "crypto-neon-fp-armv8", ARM::AEK_CRC | ARM::AEK_SEC | ARM::AEK_MP | - ARM::AEK_VIRT | ARM::AEK_HWDIVARM | - ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP | ARM::AEK_DOTPROD | - ARM::AEK_FP16 | ARM::AEK_RAS, - "8.2-A")); - EXPECT_TRUE(testARMCPU("cortex-m23", "armv8-m.base", "none", - ARM::AEK_HWDIVTHUMB, "8-M.Baseline")); - EXPECT_TRUE(testARMCPU("cortex-m33", "armv8-m.main", "fpv5-sp-d16", - ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP, "8-M.Mainline")); - EXPECT_TRUE(testARMCPU("cortex-m35p", "armv8-m.main", "fpv5-sp-d16", - ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP, "8-M.Mainline")); - EXPECT_TRUE(testARMCPU("cortex-m55", "armv8.1-m.main", "fp-armv8-fullfp16-d16", + ARM::AEK_VIRT | ARM::AEK_HWDIVARM | + ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP | + ARM::AEK_DOTPROD | ARM::AEK_FP16 | ARM::AEK_RAS, + "8.2-A"), + ARMCPUTestParams("cortex-m23", "armv8-m.base", "none", + ARM::AEK_NONE | ARM::AEK_HWDIVTHUMB, "8-M.Baseline"), + ARMCPUTestParams("cortex-m33", "armv8-m.main", "fpv5-sp-d16", + ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP, "8-M.Mainline"), + ARMCPUTestParams("cortex-m35p", "armv8-m.main", "fpv5-sp-d16", + ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP, "8-M.Mainline"), + ARMCPUTestParams("cortex-m55", "armv8.1-m.main", + "fp-armv8-fullfp16-d16", ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP | ARM::AEK_SIMD | - ARM::AEK_FP | ARM::AEK_RAS | ARM::AEK_LOB | - ARM::AEK_FP16, - "8.1-M.Mainline")); - EXPECT_TRUE(testARMCPU("iwmmxt", "iwmmxt", "none", - ARM::AEK_NONE, "iwmmxt")); - EXPECT_TRUE(testARMCPU("xscale", "xscale", "none", - ARM::AEK_NONE, "xscale")); - EXPECT_TRUE(testARMCPU("swift", "armv7s", "neon-vfpv4", + ARM::AEK_FP | ARM::AEK_RAS | ARM::AEK_LOB | + ARM::AEK_FP16, + "8.1-M.Mainline"), + ARMCPUTestParams("iwmmxt", "iwmmxt", "none", ARM::AEK_NONE, "iwmmxt"), + ARMCPUTestParams("xscale", "xscale", "none", ARM::AEK_NONE, "xscale"), + ARMCPUTestParams("swift", "armv7s", "neon-vfpv4", ARM::AEK_HWDIVARM | ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP, - "7-S")); -} + "7-S")), ); static constexpr unsigned NumARMCPUArchs = 91; @@ -798,278 +805,307 @@ TEST(TargetParserTest, ARMparseArchVersion) { EXPECT_EQ(5u, ARM::parseArchVersion(ARMArch[i])); } -bool testAArch64CPU(StringRef CPUName, StringRef ExpectedArch, - StringRef ExpectedFPU, uint64_t ExpectedFlags, - StringRef CPUAttr) { - AArch64::ArchKind AK = AArch64::parseCPUArch(CPUName); - bool pass = AArch64::getArchName(AK).equals(ExpectedArch); +class AArch64CPUTestFixture + : public ::testing::TestWithParam {}; - uint64_t ExtKind = AArch64::getDefaultExtensions(CPUName, AK); - if (ExtKind > 1 && (ExtKind & AArch64::AEK_NONE)) - pass &= ((ExtKind ^ AArch64::AEK_NONE) == ExpectedFlags); - else - pass &= (ExtKind == ExpectedFlags); +TEST_P(AArch64CPUTestFixture, testAArch64CPU) { + ARMCPUTestParams params = GetParam(); - unsigned FPUKind = AArch64::getDefaultFPU(CPUName, AK); - pass &= ARM::getFPUName(FPUKind).equals(ExpectedFPU); + AArch64::ArchKind AK = AArch64::parseCPUArch(params.CPUName); + EXPECT_EQ(params.ExpectedArch, AArch64::getArchName(AK)); - pass &= AArch64::getCPUAttr(AK).equals(CPUAttr); + uint64_t default_extensions = + AArch64::getDefaultExtensions(params.CPUName, AK); + EXPECT_EQ(params.ExpectedFlags, default_extensions); - return pass; -} + unsigned FPUKind = AArch64::getDefaultFPU(params.CPUName, AK); + EXPECT_EQ(params.ExpectedFPU, ARM::getFPUName(FPUKind)); -TEST(TargetParserTest, testAArch64CPU) { - EXPECT_TRUE(testAArch64CPU( - "invalid", "invalid", "invalid", - AArch64::AEK_NONE, "")); - EXPECT_TRUE(testAArch64CPU( - "generic", "invalid", "none", - AArch64::AEK_NONE, "")); - - EXPECT_TRUE(testAArch64CPU( - "cortex-a34", "armv8-a", "crypto-neon-fp-armv8", - AArch64::AEK_CRC | AArch64::AEK_CRYPTO | AArch64::AEK_FP | - AArch64::AEK_SIMD, "8-A")); - EXPECT_TRUE(testAArch64CPU( - "cortex-a35", "armv8-a", "crypto-neon-fp-armv8", - AArch64::AEK_CRC | AArch64::AEK_CRYPTO | AArch64::AEK_FP | - AArch64::AEK_SIMD, "8-A")); - EXPECT_TRUE(testAArch64CPU( - "cortex-a53", "armv8-a", "crypto-neon-fp-armv8", - AArch64::AEK_CRC | AArch64::AEK_CRYPTO | AArch64::AEK_FP | - AArch64::AEK_SIMD, "8-A")); - EXPECT_TRUE(testAArch64CPU( - "cortex-a55", "armv8.2-a", "crypto-neon-fp-armv8", - AArch64::AEK_CRC | AArch64::AEK_CRYPTO | AArch64::AEK_FP | - AArch64::AEK_SIMD | AArch64::AEK_RAS | AArch64::AEK_LSE | - AArch64::AEK_RDM | AArch64::AEK_FP16 | AArch64::AEK_DOTPROD | - AArch64::AEK_RCPC, "8.2-A")); - EXPECT_TRUE(testAArch64CPU( - "cortex-a57", "armv8-a", "crypto-neon-fp-armv8", - AArch64::AEK_CRC | AArch64::AEK_CRYPTO | AArch64::AEK_FP | - AArch64::AEK_SIMD, "8-A")); - EXPECT_TRUE(testAArch64CPU( - "cortex-a65", "armv8.2-a", "crypto-neon-fp-armv8", - AArch64::AEK_CRC | AArch64::AEK_CRYPTO | AArch64::AEK_DOTPROD | - AArch64::AEK_FP | AArch64::AEK_FP16 | AArch64::AEK_LSE | - AArch64::AEK_RAS | AArch64::AEK_RCPC | AArch64::AEK_RDM | - AArch64::AEK_SIMD | AArch64::AEK_SSBS, - "8.2-A")); - EXPECT_TRUE(testAArch64CPU( - "cortex-a65ae", "armv8.2-a", "crypto-neon-fp-armv8", - AArch64::AEK_CRC | AArch64::AEK_CRYPTO | AArch64::AEK_DOTPROD | - AArch64::AEK_FP | AArch64::AEK_FP16 | AArch64::AEK_LSE | - AArch64::AEK_RAS | AArch64::AEK_RCPC | AArch64::AEK_RDM | - AArch64::AEK_SIMD | AArch64::AEK_SSBS, - "8.2-A")); - EXPECT_TRUE(testAArch64CPU( - "cortex-a72", "armv8-a", "crypto-neon-fp-armv8", - AArch64::AEK_CRC | AArch64::AEK_CRYPTO | AArch64::AEK_FP | - AArch64::AEK_SIMD, "8-A")); - EXPECT_TRUE(testAArch64CPU( - "cortex-a73", "armv8-a", "crypto-neon-fp-armv8", - AArch64::AEK_CRC | AArch64::AEK_CRYPTO | AArch64::AEK_FP | - AArch64::AEK_SIMD, "8-A")); - EXPECT_TRUE(testAArch64CPU( - "cortex-a75", "armv8.2-a", "crypto-neon-fp-armv8", - AArch64::AEK_CRC | AArch64::AEK_CRYPTO | AArch64::AEK_FP | - AArch64::AEK_SIMD | AArch64::AEK_RAS | AArch64::AEK_LSE | - AArch64::AEK_RDM | AArch64::AEK_FP16 | AArch64::AEK_DOTPROD | - AArch64::AEK_RCPC, "8.2-A")); - EXPECT_TRUE(testAArch64CPU( - "cortex-a76", "armv8.2-a", "crypto-neon-fp-armv8", - AArch64::AEK_CRC | AArch64::AEK_CRYPTO | AArch64::AEK_FP | - AArch64::AEK_RDM | AArch64::AEK_SIMD | AArch64::AEK_RAS | - AArch64::AEK_LSE | AArch64::AEK_FP16 | AArch64::AEK_DOTPROD | - AArch64::AEK_RCPC| AArch64::AEK_SSBS, "8.2-A")); - EXPECT_TRUE(testAArch64CPU( - "cortex-a76ae", "armv8.2-a", "crypto-neon-fp-armv8", - AArch64::AEK_CRC | AArch64::AEK_CRYPTO | AArch64::AEK_FP | - AArch64::AEK_RDM | AArch64::AEK_SIMD | AArch64::AEK_RAS | - AArch64::AEK_LSE | AArch64::AEK_FP16 | AArch64::AEK_DOTPROD | - AArch64::AEK_RCPC| AArch64::AEK_SSBS, "8.2-A")); - EXPECT_TRUE(testAArch64CPU( - "cortex-a77", "armv8.2-a", "crypto-neon-fp-armv8", - AArch64::AEK_CRC | AArch64::AEK_CRYPTO | AArch64::AEK_FP | - AArch64::AEK_RDM | AArch64::AEK_SIMD | AArch64::AEK_RAS | - AArch64::AEK_LSE | AArch64::AEK_FP16 | AArch64::AEK_DOTPROD | - AArch64::AEK_RCPC | AArch64::AEK_SSBS, "8.2-A")); - EXPECT_TRUE(testAArch64CPU( - "cortex-a78", "armv8.2-a", "crypto-neon-fp-armv8", - AArch64::AEK_CRC | AArch64::AEK_CRYPTO | AArch64::AEK_FP | - AArch64::AEK_RDM | AArch64::AEK_SIMD | AArch64::AEK_RAS | - AArch64::AEK_LSE | AArch64::AEK_FP16 | AArch64::AEK_DOTPROD | - AArch64::AEK_RCPC | AArch64::AEK_SSBS, - "8.2-A")); - EXPECT_TRUE(testAArch64CPU( - "neoverse-v1", "armv8.4-a", "crypto-neon-fp-armv8", - AArch64::AEK_RAS | AArch64::AEK_SVE | AArch64::AEK_SSBS | - AArch64::AEK_RCPC | AArch64::AEK_CRC | AArch64::AEK_FP | - AArch64::AEK_SIMD | AArch64::AEK_RAS | AArch64::AEK_LSE | - AArch64::AEK_RDM | AArch64::AEK_RCPC | AArch64::AEK_DOTPROD | - AArch64::AEK_CRYPTO | AArch64::AEK_FP16 | AArch64::AEK_BF16, - "8.4-A")); - EXPECT_TRUE(testAArch64CPU( - "cortex-r82", "armv8-r", "crypto-neon-fp-armv8", - AArch64::AEK_CRC | AArch64::AEK_RDM | AArch64::AEK_SSBS | - AArch64::AEK_DOTPROD | AArch64::AEK_FP | AArch64::AEK_SIMD | - AArch64::AEK_FP16 | AArch64::AEK_FP16FML | AArch64::AEK_RAS | - AArch64::AEK_RCPC | AArch64::AEK_LSE | AArch64::AEK_SB, - "8-R")); - EXPECT_TRUE(testAArch64CPU( - "cortex-x1", "armv8.2-a", "crypto-neon-fp-armv8", - AArch64::AEK_CRC | AArch64::AEK_CRYPTO | AArch64::AEK_FP | - AArch64::AEK_RDM | AArch64::AEK_SIMD | AArch64::AEK_RAS | - AArch64::AEK_LSE | AArch64::AEK_FP16 | AArch64::AEK_DOTPROD | - AArch64::AEK_RCPC | AArch64::AEK_SSBS, - "8.2-A")); - EXPECT_TRUE(testAArch64CPU( - "cyclone", "armv8-a", "crypto-neon-fp-armv8", - AArch64::AEK_CRYPTO | AArch64::AEK_FP | AArch64::AEK_SIMD, "8-A")); - EXPECT_TRUE(testAArch64CPU( - "apple-a7", "armv8-a", "crypto-neon-fp-armv8", - AArch64::AEK_CRYPTO | AArch64::AEK_FP | AArch64::AEK_SIMD, "8-A")); - EXPECT_TRUE(testAArch64CPU( - "apple-a8", "armv8-a", "crypto-neon-fp-armv8", - AArch64::AEK_CRYPTO | AArch64::AEK_FP | AArch64::AEK_SIMD, "8-A")); - EXPECT_TRUE(testAArch64CPU( - "apple-a9", "armv8-a", "crypto-neon-fp-armv8", - AArch64::AEK_CRYPTO | AArch64::AEK_FP | AArch64::AEK_SIMD, "8-A")); - EXPECT_TRUE(testAArch64CPU("apple-a10", "armv8-a", "crypto-neon-fp-armv8", - AArch64::AEK_CRC | AArch64::AEK_CRYPTO | - AArch64::AEK_FP | AArch64::AEK_RDM | - AArch64::AEK_SIMD, - "8-A")); - EXPECT_TRUE(testAArch64CPU("apple-a11", "armv8.2-a", "crypto-neon-fp-armv8", - AArch64::AEK_CRC | AArch64::AEK_CRYPTO | - AArch64::AEK_FP | AArch64::AEK_LSE | - AArch64::AEK_RAS | AArch64::AEK_RDM | - AArch64::AEK_SIMD, - "8.2-A")); - EXPECT_TRUE(testAArch64CPU( - "apple-a12", "armv8.3-a", "crypto-neon-fp-armv8", - AArch64::AEK_CRC | AArch64::AEK_CRYPTO | AArch64::AEK_FP | - AArch64::AEK_SIMD | AArch64::AEK_LSE | AArch64::AEK_RAS | - AArch64::AEK_RDM | AArch64::AEK_RCPC | AArch64::AEK_FP16, - "8.3-A")); - EXPECT_TRUE(testAArch64CPU( - "apple-a13", "armv8.4-a", "crypto-neon-fp-armv8", - AArch64::AEK_CRC | AArch64::AEK_CRYPTO | AArch64::AEK_FP | - AArch64::AEK_SIMD | AArch64::AEK_LSE | AArch64::AEK_RAS | - AArch64::AEK_RDM | AArch64::AEK_RCPC | AArch64::AEK_DOTPROD | - AArch64::AEK_FP16 | AArch64::AEK_FP16FML, - "8.4-A")); - EXPECT_TRUE(testAArch64CPU( - "apple-s4", "armv8.3-a", "crypto-neon-fp-armv8", - AArch64::AEK_CRC | AArch64::AEK_CRYPTO | AArch64::AEK_FP | - AArch64::AEK_SIMD | AArch64::AEK_LSE | AArch64::AEK_RAS | - AArch64::AEK_RDM | AArch64::AEK_RCPC | AArch64::AEK_FP16, - "8.3-A")); - EXPECT_TRUE(testAArch64CPU( - "apple-s5", "armv8.3-a", "crypto-neon-fp-armv8", - AArch64::AEK_CRC | AArch64::AEK_CRYPTO | AArch64::AEK_FP | - AArch64::AEK_SIMD | AArch64::AEK_LSE | AArch64::AEK_RAS | - AArch64::AEK_RDM | AArch64::AEK_RCPC | AArch64::AEK_FP16, - "8.3-A")); - EXPECT_TRUE(testAArch64CPU( - "exynos-m3", "armv8-a", "crypto-neon-fp-armv8", - AArch64::AEK_CRC | AArch64::AEK_CRYPTO | AArch64::AEK_FP | - AArch64::AEK_SIMD, "8-A")); - EXPECT_TRUE(testAArch64CPU( - "exynos-m4", "armv8.2-a", "crypto-neon-fp-armv8", - AArch64::AEK_CRC | AArch64::AEK_CRYPTO | - AArch64::AEK_DOTPROD | AArch64::AEK_FP | AArch64::AEK_FP16 | - AArch64::AEK_LSE | AArch64::AEK_RAS | AArch64::AEK_RDM | - AArch64::AEK_SIMD, "8.2-A")); - EXPECT_TRUE(testAArch64CPU( - "exynos-m5", "armv8.2-a", "crypto-neon-fp-armv8", - AArch64::AEK_CRC | AArch64::AEK_CRYPTO | - AArch64::AEK_DOTPROD | AArch64::AEK_FP | AArch64::AEK_FP16 | - AArch64::AEK_LSE | AArch64::AEK_RAS | AArch64::AEK_RDM | - AArch64::AEK_SIMD, "8.2-A")); - EXPECT_TRUE(testAArch64CPU( - "falkor", "armv8-a", "crypto-neon-fp-armv8", - AArch64::AEK_CRC | AArch64::AEK_CRYPTO | AArch64::AEK_FP | - AArch64::AEK_SIMD | AArch64::AEK_RDM, "8-A")); - EXPECT_TRUE(testAArch64CPU( - "kryo", "armv8-a", "crypto-neon-fp-armv8", - AArch64::AEK_CRC | AArch64::AEK_CRYPTO | AArch64::AEK_FP | - AArch64::AEK_SIMD, "8-A")); - EXPECT_TRUE(testAArch64CPU( - "neoverse-e1", "armv8.2-a", "crypto-neon-fp-armv8", - AArch64::AEK_CRC | AArch64::AEK_CRYPTO | AArch64::AEK_DOTPROD | - AArch64::AEK_FP | AArch64::AEK_FP16 | AArch64::AEK_LSE | - AArch64::AEK_RAS | AArch64::AEK_RCPC | AArch64::AEK_RDM | - AArch64::AEK_SIMD | AArch64::AEK_SSBS, - "8.2-A")); - EXPECT_TRUE(testAArch64CPU( - "neoverse-n1", "armv8.2-a", "crypto-neon-fp-armv8", - AArch64::AEK_CRC | AArch64::AEK_CRYPTO | AArch64::AEK_DOTPROD | - AArch64::AEK_FP | AArch64::AEK_FP16 | AArch64::AEK_LSE | - AArch64::AEK_PROFILE | AArch64::AEK_RAS | AArch64::AEK_RCPC | - AArch64::AEK_RDM | AArch64::AEK_SIMD | AArch64::AEK_SSBS, - "8.2-A")); - EXPECT_TRUE(testAArch64CPU( - "neoverse-n2", "armv8.5-a", "crypto-neon-fp-armv8", - AArch64::AEK_CRC | AArch64::AEK_CRYPTO | AArch64::AEK_FP | - AArch64::AEK_SIMD | AArch64::AEK_FP16 | AArch64::AEK_RAS | - AArch64::AEK_LSE | AArch64::AEK_SVE | AArch64::AEK_DOTPROD | - AArch64::AEK_RCPC | AArch64::AEK_RDM | AArch64::AEK_MTE | - AArch64::AEK_SSBS | AArch64::AEK_SB | AArch64::AEK_SVE2 | - AArch64::AEK_SVE2BITPERM | AArch64::AEK_BF16 | AArch64::AEK_I8MM, - "8.5-A")); - EXPECT_TRUE(testAArch64CPU( - "thunderx2t99", "armv8.1-a", "crypto-neon-fp-armv8", - AArch64::AEK_CRC | AArch64::AEK_CRYPTO | AArch64::AEK_LSE | - AArch64::AEK_RDM | AArch64::AEK_FP | AArch64::AEK_SIMD, "8.1-A")); - EXPECT_TRUE(testAArch64CPU( - "thunderx3t110", "armv8.3-a", "crypto-neon-fp-armv8", - AArch64::AEK_CRC | AArch64::AEK_CRYPTO | AArch64::AEK_LSE | - AArch64::AEK_RDM | AArch64::AEK_FP | AArch64::AEK_SIMD | - AArch64::AEK_PROFILE | AArch64::AEK_RAS | AArch64::AEK_RAND | - AArch64::AEK_RCPC, - "8.3-A")); - EXPECT_TRUE(testAArch64CPU( - "thunderx", "armv8-a", "crypto-neon-fp-armv8", - AArch64::AEK_CRC | AArch64::AEK_CRYPTO | AArch64::AEK_SIMD | - AArch64::AEK_FP | AArch64::AEK_PROFILE, - "8-A")); - EXPECT_TRUE(testAArch64CPU( - "thunderxt81", "armv8-a", "crypto-neon-fp-armv8", - AArch64::AEK_CRC | AArch64::AEK_CRYPTO | AArch64::AEK_SIMD | - AArch64::AEK_FP | AArch64::AEK_PROFILE, - "8-A")); - EXPECT_TRUE(testAArch64CPU( - "thunderxt83", "armv8-a", "crypto-neon-fp-armv8", - AArch64::AEK_CRC | AArch64::AEK_CRYPTO | AArch64::AEK_SIMD | - AArch64::AEK_FP | AArch64::AEK_PROFILE, - "8-A")); - EXPECT_TRUE(testAArch64CPU( - "thunderxt88", "armv8-a", "crypto-neon-fp-armv8", - AArch64::AEK_CRC | AArch64::AEK_CRYPTO | AArch64::AEK_SIMD | - AArch64::AEK_FP | AArch64::AEK_PROFILE, - "8-A")); - EXPECT_TRUE(testAArch64CPU( - "tsv110", "armv8.2-a", "crypto-neon-fp-armv8", - AArch64::AEK_CRC | AArch64::AEK_CRYPTO | AArch64::AEK_FP | - AArch64::AEK_SIMD | AArch64::AEK_RAS | AArch64::AEK_LSE | - AArch64::AEK_RDM | AArch64::AEK_PROFILE | AArch64::AEK_FP16 | - AArch64::AEK_FP16FML | AArch64::AEK_DOTPROD, - "8.2-A")); - EXPECT_TRUE(testAArch64CPU( - "a64fx", "armv8.2-a", "crypto-neon-fp-armv8", - AArch64::AEK_CRC | AArch64::AEK_CRYPTO | AArch64::AEK_FP | - AArch64::AEK_SIMD | AArch64::AEK_FP16 | AArch64::AEK_RAS | - AArch64::AEK_LSE | AArch64::AEK_SVE | AArch64::AEK_RDM, - "8.2-A")); - EXPECT_TRUE(testAArch64CPU( - "carmel", "armv8.2-a", "crypto-neon-fp-armv8", - AArch64::AEK_CRC | AArch64::AEK_CRYPTO | AArch64::AEK_FP | - AArch64::AEK_SIMD | AArch64::AEK_FP16 | AArch64::AEK_RAS | - AArch64::AEK_LSE | AArch64::AEK_RDM, - "8.2-A")); + EXPECT_EQ(params.CPUAttr, AArch64::getCPUAttr(AK)); } +INSTANTIATE_TEST_CASE_P( + AArch64CPUTests, AArch64CPUTestFixture, + ::testing::Values( + ARMCPUTestParams("invalid", "invalid", "invalid", AArch64::AEK_NONE, + ""), + ARMCPUTestParams("generic", "invalid", "none", AArch64::AEK_NONE, ""), + + ARMCPUTestParams("cortex-a34", "armv8-a", "crypto-neon-fp-armv8", + AArch64::AEK_CRC | AArch64::AEK_CRYPTO | + AArch64::AEK_FP | AArch64::AEK_SIMD, + "8-A"), + ARMCPUTestParams("cortex-a35", "armv8-a", "crypto-neon-fp-armv8", + AArch64::AEK_CRC | AArch64::AEK_CRYPTO | + AArch64::AEK_FP | AArch64::AEK_SIMD, + "8-A"), + ARMCPUTestParams("cortex-a53", "armv8-a", "crypto-neon-fp-armv8", + AArch64::AEK_CRC | AArch64::AEK_CRYPTO | + AArch64::AEK_FP | AArch64::AEK_SIMD, + "8-A"), + ARMCPUTestParams("cortex-a55", "armv8.2-a", "crypto-neon-fp-armv8", + AArch64::AEK_CRC | AArch64::AEK_CRYPTO | + AArch64::AEK_FP | AArch64::AEK_SIMD | + AArch64::AEK_RAS | AArch64::AEK_LSE | + AArch64::AEK_RDM | AArch64::AEK_FP16 | + AArch64::AEK_DOTPROD | AArch64::AEK_RCPC, + "8.2-A"), + ARMCPUTestParams("cortex-a57", "armv8-a", "crypto-neon-fp-armv8", + AArch64::AEK_CRC | AArch64::AEK_CRYPTO | + AArch64::AEK_FP | AArch64::AEK_SIMD, + "8-A"), + ARMCPUTestParams("cortex-a65", "armv8.2-a", "crypto-neon-fp-armv8", + AArch64::AEK_CRC | AArch64::AEK_CRYPTO | + AArch64::AEK_DOTPROD | AArch64::AEK_FP | + AArch64::AEK_FP16 | AArch64::AEK_LSE | + AArch64::AEK_RAS | AArch64::AEK_RCPC | + AArch64::AEK_RDM | AArch64::AEK_SIMD | + AArch64::AEK_SSBS, + "8.2-A"), + ARMCPUTestParams("cortex-a65ae", "armv8.2-a", "crypto-neon-fp-armv8", + AArch64::AEK_CRC | AArch64::AEK_CRYPTO | + AArch64::AEK_DOTPROD | AArch64::AEK_FP | + AArch64::AEK_FP16 | AArch64::AEK_LSE | + AArch64::AEK_RAS | AArch64::AEK_RCPC | + AArch64::AEK_RDM | AArch64::AEK_SIMD | + AArch64::AEK_SSBS, + "8.2-A"), + ARMCPUTestParams("cortex-a72", "armv8-a", "crypto-neon-fp-armv8", + AArch64::AEK_CRC | AArch64::AEK_CRYPTO | + AArch64::AEK_FP | AArch64::AEK_SIMD, + "8-A"), + ARMCPUTestParams("cortex-a73", "armv8-a", "crypto-neon-fp-armv8", + AArch64::AEK_CRC | AArch64::AEK_CRYPTO | + AArch64::AEK_FP | AArch64::AEK_SIMD, + "8-A"), + ARMCPUTestParams("cortex-a75", "armv8.2-a", "crypto-neon-fp-armv8", + AArch64::AEK_CRC | AArch64::AEK_CRYPTO | + AArch64::AEK_FP | AArch64::AEK_SIMD | + AArch64::AEK_RAS | AArch64::AEK_LSE | + AArch64::AEK_RDM | AArch64::AEK_FP16 | + AArch64::AEK_DOTPROD | AArch64::AEK_RCPC, + "8.2-A"), + ARMCPUTestParams("cortex-a76", "armv8.2-a", "crypto-neon-fp-armv8", + AArch64::AEK_CRC | AArch64::AEK_CRYPTO | + AArch64::AEK_FP | AArch64::AEK_RDM | + AArch64::AEK_SIMD | AArch64::AEK_RAS | + AArch64::AEK_LSE | AArch64::AEK_FP16 | + AArch64::AEK_DOTPROD | AArch64::AEK_RCPC | + AArch64::AEK_SSBS, + "8.2-A"), + ARMCPUTestParams("cortex-a76ae", "armv8.2-a", "crypto-neon-fp-armv8", + AArch64::AEK_CRC | AArch64::AEK_CRYPTO | + AArch64::AEK_FP | AArch64::AEK_RDM | + AArch64::AEK_SIMD | AArch64::AEK_RAS | + AArch64::AEK_LSE | AArch64::AEK_FP16 | + AArch64::AEK_DOTPROD | AArch64::AEK_RCPC | + AArch64::AEK_SSBS, + "8.2-A"), + ARMCPUTestParams("cortex-a77", "armv8.2-a", "crypto-neon-fp-armv8", + AArch64::AEK_CRC | AArch64::AEK_CRYPTO | + AArch64::AEK_FP | AArch64::AEK_RDM | + AArch64::AEK_SIMD | AArch64::AEK_RAS | + AArch64::AEK_LSE | AArch64::AEK_FP16 | + AArch64::AEK_DOTPROD | AArch64::AEK_RCPC | + AArch64::AEK_SSBS, + "8.2-A"), + ARMCPUTestParams("cortex-a78", "armv8.2-a", "crypto-neon-fp-armv8", + AArch64::AEK_CRC | AArch64::AEK_CRYPTO | + AArch64::AEK_FP | AArch64::AEK_RDM | + AArch64::AEK_SIMD | AArch64::AEK_RAS | + AArch64::AEK_LSE | AArch64::AEK_FP16 | + AArch64::AEK_DOTPROD | AArch64::AEK_RCPC | + AArch64::AEK_SSBS, + "8.2-A"), + ARMCPUTestParams( + "neoverse-v1", "armv8.4-a", "crypto-neon-fp-armv8", + AArch64::AEK_RAS | AArch64::AEK_SVE | AArch64::AEK_SSBS | + AArch64::AEK_RCPC | AArch64::AEK_CRC | AArch64::AEK_FP | + AArch64::AEK_SIMD | AArch64::AEK_RAS | AArch64::AEK_LSE | + AArch64::AEK_RDM | AArch64::AEK_RCPC | AArch64::AEK_DOTPROD | + AArch64::AEK_CRYPTO | AArch64::AEK_FP16 | AArch64::AEK_BF16, + "8.4-A"), + ARMCPUTestParams("cortex-r82", "armv8-r", "crypto-neon-fp-armv8", + AArch64::AEK_CRC | AArch64::AEK_RDM | + AArch64::AEK_SSBS | AArch64::AEK_DOTPROD | + AArch64::AEK_FP | AArch64::AEK_SIMD | + AArch64::AEK_FP16 | AArch64::AEK_FP16FML | + AArch64::AEK_RAS | AArch64::AEK_RCPC | + AArch64::AEK_LSE | AArch64::AEK_SB, + "8-R"), + ARMCPUTestParams("cortex-x1", "armv8.2-a", "crypto-neon-fp-armv8", + AArch64::AEK_CRC | AArch64::AEK_CRYPTO | + AArch64::AEK_FP | AArch64::AEK_RDM | + AArch64::AEK_SIMD | AArch64::AEK_RAS | + AArch64::AEK_LSE | AArch64::AEK_FP16 | + AArch64::AEK_DOTPROD | AArch64::AEK_RCPC | + AArch64::AEK_SSBS, + "8.2-A"), + ARMCPUTestParams("cyclone", "armv8-a", "crypto-neon-fp-armv8", + AArch64::AEK_NONE | AArch64::AEK_CRYPTO | + AArch64::AEK_FP | AArch64::AEK_SIMD, + "8-A"), + ARMCPUTestParams("apple-a7", "armv8-a", "crypto-neon-fp-armv8", + AArch64::AEK_NONE | AArch64::AEK_CRYPTO | + AArch64::AEK_FP | AArch64::AEK_SIMD, + "8-A"), + ARMCPUTestParams("apple-a8", "armv8-a", "crypto-neon-fp-armv8", + AArch64::AEK_NONE | AArch64::AEK_CRYPTO | + AArch64::AEK_FP | AArch64::AEK_SIMD, + "8-A"), + ARMCPUTestParams("apple-a9", "armv8-a", "crypto-neon-fp-armv8", + AArch64::AEK_NONE | AArch64::AEK_CRYPTO | + AArch64::AEK_FP | AArch64::AEK_SIMD, + "8-A"), + ARMCPUTestParams("apple-a10", "armv8-a", "crypto-neon-fp-armv8", + AArch64::AEK_CRC | AArch64::AEK_CRYPTO | + AArch64::AEK_FP | AArch64::AEK_RDM | + AArch64::AEK_SIMD, + "8-A"), + ARMCPUTestParams("apple-a11", "armv8.2-a", "crypto-neon-fp-armv8", + AArch64::AEK_NONE | AArch64::AEK_CRC | + AArch64::AEK_CRYPTO | AArch64::AEK_FP | + AArch64::AEK_LSE | AArch64::AEK_RAS | + AArch64::AEK_RDM | AArch64::AEK_SIMD, + "8.2-A"), + ARMCPUTestParams("apple-a12", "armv8.3-a", "crypto-neon-fp-armv8", + AArch64::AEK_CRC | AArch64::AEK_CRYPTO | + AArch64::AEK_FP | AArch64::AEK_SIMD | + AArch64::AEK_LSE | AArch64::AEK_RAS | + AArch64::AEK_RDM | AArch64::AEK_RCPC | + AArch64::AEK_FP16, + "8.3-A"), + ARMCPUTestParams("apple-a13", "armv8.4-a", "crypto-neon-fp-armv8", + AArch64::AEK_CRC | AArch64::AEK_CRYPTO | + AArch64::AEK_FP | AArch64::AEK_SIMD | + AArch64::AEK_LSE | AArch64::AEK_RAS | + AArch64::AEK_RDM | AArch64::AEK_RCPC | + AArch64::AEK_DOTPROD | AArch64::AEK_FP16 | + AArch64::AEK_FP16FML, + "8.4-A"), + ARMCPUTestParams("apple-s4", "armv8.3-a", "crypto-neon-fp-armv8", + AArch64::AEK_CRC | AArch64::AEK_CRYPTO | + AArch64::AEK_FP | AArch64::AEK_SIMD | + AArch64::AEK_LSE | AArch64::AEK_RAS | + AArch64::AEK_RDM | AArch64::AEK_RCPC | + AArch64::AEK_FP16, + "8.3-A"), + ARMCPUTestParams("apple-s5", "armv8.3-a", "crypto-neon-fp-armv8", + AArch64::AEK_CRC | AArch64::AEK_CRYPTO | + AArch64::AEK_FP | AArch64::AEK_SIMD | + AArch64::AEK_LSE | AArch64::AEK_RAS | + AArch64::AEK_RDM | AArch64::AEK_RCPC | + AArch64::AEK_FP16, + "8.3-A"), + ARMCPUTestParams("exynos-m3", "armv8-a", "crypto-neon-fp-armv8", + AArch64::AEK_CRC | AArch64::AEK_CRYPTO | + AArch64::AEK_FP | AArch64::AEK_SIMD, + "8-A"), + ARMCPUTestParams("exynos-m4", "armv8.2-a", "crypto-neon-fp-armv8", + AArch64::AEK_CRC | AArch64::AEK_CRYPTO | + AArch64::AEK_DOTPROD | AArch64::AEK_FP | + AArch64::AEK_FP16 | AArch64::AEK_LSE | + AArch64::AEK_RAS | AArch64::AEK_RDM | + AArch64::AEK_SIMD, + "8.2-A"), + ARMCPUTestParams("exynos-m5", "armv8.2-a", "crypto-neon-fp-armv8", + AArch64::AEK_CRC | AArch64::AEK_CRYPTO | + AArch64::AEK_DOTPROD | AArch64::AEK_FP | + AArch64::AEK_FP16 | AArch64::AEK_LSE | + AArch64::AEK_RAS | AArch64::AEK_RDM | + AArch64::AEK_SIMD, + "8.2-A"), + ARMCPUTestParams("falkor", "armv8-a", "crypto-neon-fp-armv8", + AArch64::AEK_CRC | AArch64::AEK_CRYPTO | + AArch64::AEK_FP | AArch64::AEK_SIMD | + AArch64::AEK_RDM, + "8-A"), + ARMCPUTestParams("kryo", "armv8-a", "crypto-neon-fp-armv8", + AArch64::AEK_CRC | AArch64::AEK_CRYPTO | + AArch64::AEK_FP | AArch64::AEK_SIMD, + "8-A"), + ARMCPUTestParams("neoverse-e1", "armv8.2-a", "crypto-neon-fp-armv8", + AArch64::AEK_CRC | AArch64::AEK_CRYPTO | + AArch64::AEK_DOTPROD | AArch64::AEK_FP | + AArch64::AEK_FP16 | AArch64::AEK_LSE | + AArch64::AEK_RAS | AArch64::AEK_RCPC | + AArch64::AEK_RDM | AArch64::AEK_SIMD | + AArch64::AEK_SSBS, + "8.2-A"), + ARMCPUTestParams("neoverse-n1", "armv8.2-a", "crypto-neon-fp-armv8", + AArch64::AEK_CRC | AArch64::AEK_CRYPTO | + AArch64::AEK_DOTPROD | AArch64::AEK_FP | + AArch64::AEK_FP16 | AArch64::AEK_LSE | + AArch64::AEK_PROFILE | AArch64::AEK_RAS | + AArch64::AEK_RCPC | AArch64::AEK_RDM | + AArch64::AEK_SIMD | AArch64::AEK_SSBS, + "8.2-A"), + ARMCPUTestParams("neoverse-n2", "armv8.5-a", "crypto-neon-fp-armv8", + AArch64::AEK_CRC | AArch64::AEK_CRYPTO | + AArch64::AEK_FP | AArch64::AEK_SIMD | + AArch64::AEK_FP16 | AArch64::AEK_RAS | + AArch64::AEK_LSE | AArch64::AEK_SVE | + AArch64::AEK_DOTPROD | AArch64::AEK_RCPC | + AArch64::AEK_RDM | AArch64::AEK_MTE | + AArch64::AEK_SSBS | AArch64::AEK_SB | + AArch64::AEK_SVE2 | AArch64::AEK_SVE2BITPERM | + AArch64::AEK_BF16 | AArch64::AEK_I8MM, + "8.5-A"), + ARMCPUTestParams("thunderx2t99", "armv8.1-a", "crypto-neon-fp-armv8", + AArch64::AEK_NONE | AArch64::AEK_CRC | + AArch64::AEK_CRYPTO | AArch64::AEK_LSE | + AArch64::AEK_RDM | AArch64::AEK_FP | + AArch64::AEK_SIMD, + "8.1-A"), + ARMCPUTestParams("thunderx3t110", "armv8.3-a", "crypto-neon-fp-armv8", + AArch64::AEK_CRC | AArch64::AEK_CRYPTO | + AArch64::AEK_LSE | AArch64::AEK_RDM | + AArch64::AEK_FP | AArch64::AEK_SIMD | + AArch64::AEK_PROFILE | AArch64::AEK_RAS | + AArch64::AEK_RAND | AArch64::AEK_RCPC, + "8.3-A"), + ARMCPUTestParams("thunderx", "armv8-a", "crypto-neon-fp-armv8", + AArch64::AEK_CRC | AArch64::AEK_CRYPTO | + AArch64::AEK_SIMD | AArch64::AEK_FP | + AArch64::AEK_PROFILE, + "8-A"), + ARMCPUTestParams("thunderxt81", "armv8-a", "crypto-neon-fp-armv8", + AArch64::AEK_CRC | AArch64::AEK_CRYPTO | + AArch64::AEK_SIMD | AArch64::AEK_FP | + AArch64::AEK_PROFILE, + "8-A"), + ARMCPUTestParams("thunderxt83", "armv8-a", "crypto-neon-fp-armv8", + AArch64::AEK_CRC | AArch64::AEK_CRYPTO | + AArch64::AEK_SIMD | AArch64::AEK_FP | + AArch64::AEK_PROFILE, + "8-A"), + ARMCPUTestParams("thunderxt88", "armv8-a", "crypto-neon-fp-armv8", + AArch64::AEK_CRC | AArch64::AEK_CRYPTO | + AArch64::AEK_SIMD | AArch64::AEK_FP | + AArch64::AEK_PROFILE, + "8-A"), + ARMCPUTestParams("tsv110", "armv8.2-a", "crypto-neon-fp-armv8", + AArch64::AEK_CRC | AArch64::AEK_CRYPTO | + AArch64::AEK_FP | AArch64::AEK_SIMD | + AArch64::AEK_RAS | AArch64::AEK_LSE | + AArch64::AEK_RDM | AArch64::AEK_PROFILE | + AArch64::AEK_FP16 | AArch64::AEK_FP16FML | + AArch64::AEK_DOTPROD, + "8.2-A"), + ARMCPUTestParams("a64fx", "armv8.2-a", "crypto-neon-fp-armv8", + AArch64::AEK_CRC | AArch64::AEK_CRYPTO | + AArch64::AEK_FP | AArch64::AEK_SIMD | + AArch64::AEK_FP16 | AArch64::AEK_RAS | + AArch64::AEK_LSE | AArch64::AEK_SVE | + AArch64::AEK_RDM, + "8.2-A"), + ARMCPUTestParams("carmel", "armv8.2-a", "crypto-neon-fp-armv8", + AArch64::AEK_CRC | AArch64::AEK_CRYPTO | + AArch64::AEK_FP | AArch64::AEK_SIMD | + AArch64::AEK_FP16 | AArch64::AEK_RAS | + AArch64::AEK_LSE | AArch64::AEK_RDM, + "8.2-A")), ); + static constexpr unsigned NumAArch64CPUArchs = 45; TEST(TargetParserTest, testAArch64CPUArchList) { @@ -1350,4 +1386,5 @@ TEST(TargetParserTest, AArch64ArchExtFeature) { AArch64::getArchExtFeature(ArchExt[i][1])); } } -} + +} // namespace From 0a8a2453fb843cf2e0f43e389b58d516525f0b8c Mon Sep 17 00:00:00 2001 From: Pavel Labath Date: Fri, 18 Dec 2020 21:26:25 +0100 Subject: [PATCH 073/378] [lldb/test] Add GdbRemoteTestCaseFactory to avoid duplication in lldb-server tests This uses the same approach as the debug info tests to avoid needing to explicitly spell out the two kinds of tests. I convert a handful of tests to the new mechanism. The rest will be converted in follow-up patches. --- .../Python/lldbsuite/test/decorators.py | 2 - .../tools/lldb-server/gdbremote_testcase.py | 33 ++++++ .../lldb-server/TestGdbRemoteExitCode.py | 39 ++----- .../tools/lldb-server/TestGdbRemoteKill.py | 18 +--- .../lldb-server/TestGdbRemoteModuleInfo.py | 11 +- .../lldb-server/TestGdbRemoteProcessInfo.py | 80 +++----------- .../lldb-server/TestGdbRemoteRegisterState.py | 23 +--- .../lldb-server/TestGdbRemoteSingleStep.py | 14 +-- .../TestGdbRemoteThreadsInStopReply.py | 100 +++++------------- 9 files changed, 96 insertions(+), 224 deletions(-) diff --git a/lldb/packages/Python/lldbsuite/test/decorators.py b/lldb/packages/Python/lldbsuite/test/decorators.py index a17cd6ea33ab1..ff445fa0b926e 100644 --- a/lldb/packages/Python/lldbsuite/test/decorators.py +++ b/lldb/packages/Python/lldbsuite/test/decorators.py @@ -373,13 +373,11 @@ def should_skip_simulator_test(): def debugserver_test(func): """Decorate the item as a debugserver test.""" - func.debug_server = "debugserver" return add_test_categories(["debugserver"])(func) def llgs_test(func): """Decorate the item as a lldb-server test.""" - func.debug_server = "llgs" return add_test_categories(["llgs"])(func) diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-server/gdbremote_testcase.py b/lldb/packages/Python/lldbsuite/test/tools/lldb-server/gdbremote_testcase.py index 0e3cde01520a0..d9289251d89db 100644 --- a/lldb/packages/Python/lldbsuite/test/tools/lldb-server/gdbremote_testcase.py +++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-server/gdbremote_testcase.py @@ -27,6 +27,39 @@ class _ConnectionRefused(IOError): pass +class GdbRemoteTestCaseFactory(type): + + def __new__(cls, name, bases, attrs): + newattrs = {} + for attrname, attrvalue in attrs.items(): + if not attrname.startswith("test"): + newattrs[attrname] = attrvalue + continue + + # If any debug server categories were explicitly tagged, assume + # that list to be authoritative. If none were specified, try + # all of them. + all_categories = set(["debugserver", "llgs"]) + categories = set( + getattr(attrvalue, "categories", [])) & all_categories + if not categories: + categories = all_categories + + for cat in categories: + @decorators.add_test_categories([cat]) + @wraps(attrvalue) + def test_method(self, attrvalue=attrvalue): + return attrvalue(self) + + method_name = attrname + "_" + cat + test_method.__name__ = method_name + test_method.debug_server = cat + newattrs[method_name] = test_method + + return super(GdbRemoteTestCaseFactory, cls).__new__( + cls, name, bases, newattrs) + +@add_metaclass(GdbRemoteTestCaseFactory) class GdbRemoteTestCaseBase(Base): # Default time out in seconds. The timeout is increased tenfold under Asan. diff --git a/lldb/test/API/tools/lldb-server/TestGdbRemoteExitCode.py b/lldb/test/API/tools/lldb-server/TestGdbRemoteExitCode.py index 96ebbfb09bdc7..b42f8431c51ed 100644 --- a/lldb/test/API/tools/lldb-server/TestGdbRemoteExitCode.py +++ b/lldb/test/API/tools/lldb-server/TestGdbRemoteExitCode.py @@ -12,46 +12,23 @@ class TestGdbRemoteExitCode(GdbRemoteTestCaseBase): mydir = TestBase.compute_mydir(__file__) - def inferior_exit_0(self): - self.prep_debug_monitor_and_inferior() - self.test_sequence.add_log_lines( - ["read packet: $vCont;c#a8", - "send packet: $W00#00"], - True) - - self.expect_gdbremote_sequence() - - @debugserver_test - @skipIfDarwinEmbedded # lldb-server tests not updated to work on ios etc yet - def test_inferior_exit_0_debugserver(self): + def _test_inferior_exit(self, retval): self.build() - self.inferior_exit_0() - - @llgs_test - def test_inferior_exit_0_llgs(self): - self.build() - self.inferior_exit_0() - - def inferior_exit_42(self): - RETVAL = 42 procs = self.prep_debug_monitor_and_inferior( - inferior_args=["retval:%d" % RETVAL]) + inferior_args=["retval:%d" % retval]) self.test_sequence.add_log_lines( ["read packet: $vCont;c#a8", - "send packet: $W{0:02x}#00".format(RETVAL)], + "send packet: $W{0:02x}#00".format(retval)], True) self.expect_gdbremote_sequence() - @debugserver_test @skipIfDarwinEmbedded # lldb-server tests not updated to work on ios etc yet - def test_inferior_exit_42_debugserver(self): - self.build() - self.inferior_exit_42() + def test_inferior_exit_0(self): + self._test_inferior_exit(0) - @llgs_test - def test_inferior_exit_42_llgs(self): - self.build() - self.inferior_exit_42() + @skipIfDarwinEmbedded # lldb-server tests not updated to work on ios etc yet + def test_inferior_exit_42(self): + self._test_inferior_exit(42) diff --git a/lldb/test/API/tools/lldb-server/TestGdbRemoteKill.py b/lldb/test/API/tools/lldb-server/TestGdbRemoteKill.py index 175ecfed538b8..94dcf7b6e1712 100644 --- a/lldb/test/API/tools/lldb-server/TestGdbRemoteKill.py +++ b/lldb/test/API/tools/lldb-server/TestGdbRemoteKill.py @@ -10,9 +10,11 @@ class TestGdbRemoteKill(gdbremote_testcase.GdbRemoteTestCaseBase): mydir = TestBase.compute_mydir(__file__) - @skipIfDarwinEmbedded # lldb-server tests not updated to work on ios etc yet - def attach_commandline_kill_after_initial_stop(self): + @skipIfDarwinEmbedded # lldb-server tests not updated to work on ios etc yet + def test_attach_commandline_kill_after_initial_stop(self): + self.build() + self.set_inferior_startup_attach() reg_expr = r"^\$[XW][0-9a-fA-F]+([^#]*)#[0-9A-Fa-f]{2}" procs = self.prep_debug_monitor_and_inferior() self.test_sequence.add_log_lines([ @@ -43,15 +45,3 @@ def attach_commandline_kill_after_initial_stop(self): self.assertFalse( lldbgdbserverutils.process_is_running( procs["inferior"].pid, False)) - - @debugserver_test - def test_attach_commandline_kill_after_initial_stop_debugserver(self): - self.build() - self.set_inferior_startup_attach() - self.attach_commandline_kill_after_initial_stop() - - @llgs_test - def test_attach_commandline_kill_after_initial_stop_llgs(self): - self.build() - self.set_inferior_startup_attach() - self.attach_commandline_kill_after_initial_stop() diff --git a/lldb/test/API/tools/lldb-server/TestGdbRemoteModuleInfo.py b/lldb/test/API/tools/lldb-server/TestGdbRemoteModuleInfo.py index 8365b657f9323..bab097c313650 100644 --- a/lldb/test/API/tools/lldb-server/TestGdbRemoteModuleInfo.py +++ b/lldb/test/API/tools/lldb-server/TestGdbRemoteModuleInfo.py @@ -12,7 +12,10 @@ class TestGdbRemoteModuleInfo(gdbremote_testcase.GdbRemoteTestCaseBase): mydir = TestBase.compute_mydir(__file__) - def module_info(self): + @add_test_categories(["llgs"]) + def test_module_info(self): + self.build() + self.set_inferior_startup_launch() procs = self.prep_debug_monitor_and_inferior() self.add_process_info_collection_packets() context = self.expect_gdbremote_sequence() @@ -34,9 +37,3 @@ def module_info(self): self.assertRegexpMatches(spec, '"file_size":\d+') self.assertRegexpMatches(spec, '"triple":"\w*-\w*-.*"') self.assertRegexpMatches(spec, '"uuid":"[A-Fa-f0-9]+"') - - @llgs_test - def test_module_info(self): - self.build() - self.set_inferior_startup_launch() - self.module_info() diff --git a/lldb/test/API/tools/lldb-server/TestGdbRemoteProcessInfo.py b/lldb/test/API/tools/lldb-server/TestGdbRemoteProcessInfo.py index a4708679e0d80..5d8c5e0840c5e 100644 --- a/lldb/test/API/tools/lldb-server/TestGdbRemoteProcessInfo.py +++ b/lldb/test/API/tools/lldb-server/TestGdbRemoteProcessInfo.py @@ -1,6 +1,3 @@ - - - import gdbremote_testcase import lldbgdbserverutils from lldbsuite.test.decorators import * @@ -12,7 +9,9 @@ class TestGdbRemoteProcessInfo(gdbremote_testcase.GdbRemoteTestCaseBase): mydir = TestBase.compute_mydir(__file__) - def qProcessInfo_returns_running_process(self): + @skipIfDarwinEmbedded # lldb-server tests not updated to work on ios etc yet + def test_qProcessInfo_returns_running_process(self): + self.build() procs = self.prep_debug_monitor_and_inferior() self.add_process_info_collection_packets() @@ -33,18 +32,10 @@ def qProcessInfo_returns_running_process(self): # If possible, verify that the process is running. self.assertTrue(lldbgdbserverutils.process_is_running(pid, True)) - @debugserver_test @skipIfDarwinEmbedded # lldb-server tests not updated to work on ios etc yet - def test_qProcessInfo_returns_running_process_debugserver(self): - self.build() - self.qProcessInfo_returns_running_process() - - @llgs_test - def test_qProcessInfo_returns_running_process_llgs(self): + def test_attach_commandline_qProcessInfo_reports_correct_pid(self): self.build() - self.qProcessInfo_returns_running_process() - - def attach_commandline_qProcessInfo_reports_correct_pid(self): + self.set_inferior_startup_attach() procs = self.prep_debug_monitor_and_inferior() self.assertIsNotNone(procs) self.add_process_info_collection_packets() @@ -63,21 +54,9 @@ def attach_commandline_qProcessInfo_reports_correct_pid(self): reported_pid = int(pid_text, base=16) self.assertEqual(reported_pid, procs["inferior"].pid) - @debugserver_test @skipIfDarwinEmbedded # lldb-server tests not updated to work on ios etc yet - def test_attach_commandline_qProcessInfo_reports_correct_pid_debugserver( - self): + def test_qProcessInfo_reports_valid_endian(self): self.build() - self.set_inferior_startup_attach() - self.attach_commandline_qProcessInfo_reports_correct_pid() - - @llgs_test - def test_attach_commandline_qProcessInfo_reports_correct_pid_llgs(self): - self.build() - self.set_inferior_startup_attach() - self.attach_commandline_qProcessInfo_reports_correct_pid() - - def qProcessInfo_reports_valid_endian(self): procs = self.prep_debug_monitor_and_inferior() self.add_process_info_collection_packets() @@ -92,18 +71,7 @@ def qProcessInfo_reports_valid_endian(self): # Ensure the process id looks reasonable. endian = process_info.get("endian") self.assertIsNotNone(endian) - self.assertTrue(endian in ["little", "big", "pdp"]) - - @debugserver_test - @skipIfDarwinEmbedded # lldb-server tests not updated to work on ios etc yet - def test_qProcessInfo_reports_valid_endian_debugserver(self): - self.build() - self.qProcessInfo_reports_valid_endian() - - @llgs_test - def test_qProcessInfo_reports_valid_endian_llgs(self): - self.build() - self.qProcessInfo_reports_valid_endian() + self.assertIn(endian, ["little", "big", "pdp"]) def qProcessInfo_contains_keys(self, expected_key_set): procs = self.prep_debug_monitor_and_inferior() @@ -152,45 +120,27 @@ def qProcessInfo_does_not_contain_keys(self, absent_key_set): set(), "the listed keys were present but unexpected in qProcessInfo result") - @skipUnlessDarwin - @debugserver_test + @add_test_categories(["debugserver"]) @skipIfDarwinEmbedded # lldb-server tests not updated to work on ios etc yet - def test_qProcessInfo_contains_cputype_cpusubtype_debugserver_darwin(self): - self.build() - self.qProcessInfo_contains_keys(set(['cputype', 'cpusubtype'])) - - @skipUnlessDarwin - @llgs_test - def test_qProcessInfo_contains_cputype_cpusubtype_llgs_darwin(self): + def test_qProcessInfo_contains_cputype_cpusubtype(self): self.build() self.qProcessInfo_contains_keys(set(['cputype', 'cpusubtype'])) - @llgs_test - def test_qProcessInfo_contains_triple_ppid_llgs(self): + @add_test_categories(["llgs"]) + def test_qProcessInfo_contains_triple_ppid(self): self.build() self.qProcessInfo_contains_keys(set(['triple', 'parent-pid'])) - @skipUnlessDarwin - @debugserver_test + @add_test_categories(["debugserver"]) @skipIfDarwinEmbedded # lldb-server tests not updated to work on ios etc yet - def test_qProcessInfo_does_not_contain_triple_debugserver_darwin(self): - self.build() - # We don't expect to see triple on darwin. If we do, we'll prefer triple - # to cputype/cpusubtype and skip some darwin-based ProcessGDBRemote ArchSpec setup - # for the remote Host and Process. - self.qProcessInfo_does_not_contain_keys(set(['triple'])) - - @skipUnlessDarwin - @llgs_test - def test_qProcessInfo_does_not_contain_triple_llgs_darwin(self): + def test_qProcessInfo_does_not_contain_triple(self): self.build() # We don't expect to see triple on darwin. If we do, we'll prefer triple # to cputype/cpusubtype and skip some darwin-based ProcessGDBRemote ArchSpec setup # for the remote Host and Process. self.qProcessInfo_does_not_contain_keys(set(['triple'])) - @skipIfDarwin - @llgs_test - def test_qProcessInfo_does_not_contain_cputype_cpusubtype_llgs(self): + @add_test_categories(["llgs"]) + def test_qProcessInfo_does_not_contain_cputype_cpusubtype(self): self.build() self.qProcessInfo_does_not_contain_keys(set(['cputype', 'cpusubtype'])) diff --git a/lldb/test/API/tools/lldb-server/TestGdbRemoteRegisterState.py b/lldb/test/API/tools/lldb-server/TestGdbRemoteRegisterState.py index 3d07e19d2d382..849f5c96244db 100644 --- a/lldb/test/API/tools/lldb-server/TestGdbRemoteRegisterState.py +++ b/lldb/test/API/tools/lldb-server/TestGdbRemoteRegisterState.py @@ -9,7 +9,6 @@ class TestGdbRemoteRegisterState(gdbremote_testcase.GdbRemoteTestCaseBase): mydir = TestBase.compute_mydir(__file__) - @skipIfDarwinEmbedded # lldb-server tests not updated to work on ios etc yet def grp_register_save_restore_works(self, with_suffix): # Start up the process, use thread suffix, grab main thread id. inferior_args = ["message:main entered", "sleep:5"] @@ -92,29 +91,15 @@ def grp_register_save_restore_works(self, with_suffix): self.assertIsNotNone(final_reg_values) self.assertEqual(final_reg_values, initial_reg_values) - @debugserver_test - def test_grp_register_save_restore_works_with_suffix_debugserver(self): - USE_THREAD_SUFFIX = True - self.build() - self.set_inferior_startup_launch() - self.grp_register_save_restore_works(USE_THREAD_SUFFIX) - - @llgs_test - def test_grp_register_save_restore_works_with_suffix_llgs(self): + @skipIfDarwinEmbedded # lldb-server tests not updated to work on ios etc yet + def test_grp_register_save_restore_works_with_suffix(self): USE_THREAD_SUFFIX = True self.build() self.set_inferior_startup_launch() self.grp_register_save_restore_works(USE_THREAD_SUFFIX) - @debugserver_test - def test_grp_register_save_restore_works_no_suffix_debugserver(self): - USE_THREAD_SUFFIX = False - self.build() - self.set_inferior_startup_launch() - self.grp_register_save_restore_works(USE_THREAD_SUFFIX) - - @llgs_test - def test_grp_register_save_restore_works_no_suffix_llgs(self): + @skipIfDarwinEmbedded # lldb-server tests not updated to work on ios etc yet + def test_grp_register_save_restore_works_no_suffix(self): USE_THREAD_SUFFIX = False self.build() self.set_inferior_startup_launch() diff --git a/lldb/test/API/tools/lldb-server/TestGdbRemoteSingleStep.py b/lldb/test/API/tools/lldb-server/TestGdbRemoteSingleStep.py index fba8bec8ee6b8..09f729ca0daca 100644 --- a/lldb/test/API/tools/lldb-server/TestGdbRemoteSingleStep.py +++ b/lldb/test/API/tools/lldb-server/TestGdbRemoteSingleStep.py @@ -1,5 +1,3 @@ - - import gdbremote_testcase from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * @@ -10,18 +8,10 @@ class TestGdbRemoteSingleStep(gdbremote_testcase.GdbRemoteTestCaseBase): mydir = TestBase.compute_mydir(__file__) - @skipIfDarwinEmbedded # lldb-server tests not updated to work on ios etc yet - @debugserver_test - def test_single_step_only_steps_one_instruction_with_s_debugserver(self): - self.build() - self.set_inferior_startup_launch() - self.single_step_only_steps_one_instruction( - use_Hc_packet=True, step_instruction="s") - @skipIfWindows # No pty support to test any inferior std -i/e/o - @llgs_test @skipIf(triple='^mips') - def test_single_step_only_steps_one_instruction_with_s_llgs(self): + @skipIfDarwinEmbedded # lldb-server tests not updated to work on ios etc yet + def test_single_step_only_steps_one_instruction_with_s(self): self.build() self.set_inferior_startup_launch() self.single_step_only_steps_one_instruction( diff --git a/lldb/test/API/tools/lldb-server/TestGdbRemoteThreadsInStopReply.py b/lldb/test/API/tools/lldb-server/TestGdbRemoteThreadsInStopReply.py index c83b4fbdd37dc..95e4236781473 100644 --- a/lldb/test/API/tools/lldb-server/TestGdbRemoteThreadsInStopReply.py +++ b/lldb/test/API/tools/lldb-server/TestGdbRemoteThreadsInStopReply.py @@ -1,4 +1,3 @@ - import json import re @@ -166,7 +165,11 @@ def gather_threads_info_pcs(self, pc_register, little_endian): return thread_pcs - def QListThreadsInStopReply_supported(self): + + @skipIfDarwinEmbedded # lldb-server tests not updated to work on ios etc yet + def test_QListThreadsInStopReply_supported(self): + self.build() + self.set_inferior_startup_launch() procs = self.prep_debug_monitor_and_inferior() self.test_sequence.add_log_lines( self.ENABLE_THREADS_IN_STOP_REPLY_ENTRIES, True) @@ -174,69 +177,42 @@ def QListThreadsInStopReply_supported(self): context = self.expect_gdbremote_sequence() self.assertIsNotNone(context) + # In current implementation of llgs on Windows, as a response to '\x03' packet, the debugger + # of the native process will trigger a call to DebugBreakProcess that will create a new thread + # to handle the exception debug event. So one more stop thread will be notified to the + # delegate, e.g. llgs. So tests below to assert the stop threads number will all fail. + @expectedFailureAll(oslist=["windows"]) + @skipIfNetBSD @skipIfDarwinEmbedded # lldb-server tests not updated to work on ios etc yet - @debugserver_test - def test_QListThreadsInStopReply_supported_debugserver(self): + def test_stop_reply_reports_multiple_threads(self): self.build() self.set_inferior_startup_launch() - self.QListThreadsInStopReply_supported() - - @llgs_test - def test_QListThreadsInStopReply_supported_llgs(self): - self.build() - self.set_inferior_startup_launch() - self.QListThreadsInStopReply_supported() - - def stop_reply_reports_multiple_threads(self, thread_count): # Gather threads from stop notification when QThreadsInStopReply is # enabled. stop_reply_threads = self.gather_stop_reply_threads( - self.ENABLE_THREADS_IN_STOP_REPLY_ENTRIES, thread_count) - self.assertEqual(len(stop_reply_threads), thread_count) + self.ENABLE_THREADS_IN_STOP_REPLY_ENTRIES, 5) + self.assertEqual(len(stop_reply_threads), 5) @skipIfDarwinEmbedded # lldb-server tests not updated to work on ios etc yet - @debugserver_test - def test_stop_reply_reports_multiple_threads_debugserver(self): - self.build() - self.set_inferior_startup_launch() - self.stop_reply_reports_multiple_threads(5) - - # In current implementation of llgs on Windows, as a response to '\x03' packet, the debugger - # of the native process will trigger a call to DebugBreakProcess that will create a new thread - # to handle the exception debug event. So one more stop thread will be notified to the - # delegate, e.g. llgs. So tests below to assert the stop threads number will all fail. @expectedFailureAll(oslist=["windows"]) @skipIfNetBSD - @llgs_test - def test_stop_reply_reports_multiple_threads_llgs(self): + def test_no_QListThreadsInStopReply_supplies_no_threads(self): self.build() self.set_inferior_startup_launch() - self.stop_reply_reports_multiple_threads(5) - - def no_QListThreadsInStopReply_supplies_no_threads(self, thread_count): # Gather threads from stop notification when QThreadsInStopReply is not # enabled. - stop_reply_threads = self.gather_stop_reply_threads(None, thread_count) + stop_reply_threads = self.gather_stop_reply_threads(None, 5) self.assertEqual(len(stop_reply_threads), 0) - @skipIfDarwinEmbedded # lldb-server tests not updated to work on ios etc yet - @debugserver_test - def test_no_QListThreadsInStopReply_supplies_no_threads_debugserver(self): - self.build() - self.set_inferior_startup_launch() - self.no_QListThreadsInStopReply_supplies_no_threads(5) - @expectedFailureAll(oslist=["windows"]) @skipIfNetBSD - @llgs_test - def test_no_QListThreadsInStopReply_supplies_no_threads_llgs(self): + @skipIfDarwinEmbedded # lldb-server tests not updated to work on ios etc yet + def test_stop_reply_reports_correct_threads(self): self.build() self.set_inferior_startup_launch() - self.no_QListThreadsInStopReply_supplies_no_threads(5) - - def stop_reply_reports_correct_threads(self, thread_count): # Gather threads from stop notification when QThreadsInStopReply is # enabled. + thread_count = 5 stop_reply_threads = self.gather_stop_reply_threads( self.ENABLE_THREADS_IN_STOP_REPLY_ENTRIES, thread_count) self.assertEqual(len(stop_reply_threads), thread_count) @@ -254,24 +230,15 @@ def stop_reply_reports_correct_threads(self, thread_count): # Ensure each thread in q{f,s}ThreadInfo appears in stop reply threads for tid in threads: - self.assertTrue(tid in stop_reply_threads) - - @skipIfDarwinEmbedded # lldb-server tests not updated to work on ios etc yet - @debugserver_test - def test_stop_reply_reports_correct_threads_debugserver(self): - self.build() - self.set_inferior_startup_launch() - self.stop_reply_reports_correct_threads(5) + self.assertIn(tid, stop_reply_threads) @expectedFailureAll(oslist=["windows"]) @skipIfNetBSD - @llgs_test - def test_stop_reply_reports_correct_threads_llgs(self): + @skipIfDarwinEmbedded # lldb-server tests not updated to work on ios etc yet + def test_stop_reply_contains_thread_pcs(self): self.build() self.set_inferior_startup_launch() - self.stop_reply_reports_correct_threads(5) - - def stop_reply_contains_thread_pcs(self, thread_count): + thread_count = 5 results = self.gather_stop_reply_pcs( self.ENABLE_THREADS_IN_STOP_REPLY_ENTRIES, thread_count) stop_reply_pcs = results["thread_pcs"] @@ -284,21 +251,6 @@ def stop_reply_contains_thread_pcs(self, thread_count): self.assertEqual(len(threads_info_pcs), thread_count) for thread_id in stop_reply_pcs: - self.assertTrue(thread_id in threads_info_pcs) - self.assertTrue(int(stop_reply_pcs[thread_id], 16) - == int(threads_info_pcs[thread_id], 16)) - - @expectedFailureAll(oslist=["windows"]) - @skipIfNetBSD - @llgs_test - def test_stop_reply_contains_thread_pcs_llgs(self): - self.build() - self.set_inferior_startup_launch() - self.stop_reply_contains_thread_pcs(5) - - @skipIfDarwinEmbedded # lldb-server tests not updated to work on ios etc yet - @debugserver_test - def test_stop_reply_contains_thread_pcs_debugserver(self): - self.build() - self.set_inferior_startup_launch() - self.stop_reply_contains_thread_pcs(5) + self.assertIn(thread_id, threads_info_pcs) + self.assertEqual(int(stop_reply_pcs[thread_id], 16), + int(threads_info_pcs[thread_id], 16)) From 8d75d902a955602feb7e2501e34f814ff5630415 Mon Sep 17 00:00:00 2001 From: Pavel Labath Date: Tue, 24 Nov 2020 10:24:29 +0100 Subject: [PATCH 074/378] [DebugInfo] Don't use DW_OP_implicit_value for fragments Currently using DW_OP_implicit_value in fragments produces invalid DWARF expressions. (Such a case can occur in complex floats, for example.) This problem manifests itself as a missing DW_OP_piece operation after the last fragment. This happens because the function for printing constant float value skips printing the accompanying DWARF expression, as that would also print DW_OP_stack_value (which is not desirable in this case). However, this also results in DW_OP_piece being skipped. The reason that DW_OP_piece is missing only for the last piece is that the act of printing the next fragment corrects this. However, it does that for the wrong reason -- the code emitting this DW_OP_piece thinks that the previous fragment was missing, and so it thinks that it needs to skip over it in order to be able to print itself. In a simple scenario this works out, but it's likely that in a more complex setup (where some pieces are in fact missing), this logic would go badly wrong. In a simple setup gdb also seems to not mind the fact that the DW_OP_piece is missing, but it would also likely not handle more complex use cases. For this reason, this patch disables the usage of DW_OP_implicit_value in the frament scenario (we will use DW_OP_const*** instead), until we figure out the right way to deal with this. This guarantees that we produce valid expressions, and gdb can handle both kinds of inputs anyway. Differential Revision: https://reviews.llvm.org/D92013 --- llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 10 ++++---- .../DebugInfo/X86/implicit_value-double.ll | 24 ++++++++++--------- 2 files changed, 18 insertions(+), 16 deletions(-) diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 75b4a2831b0fa..6127c503404f7 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -2477,13 +2477,13 @@ void DwarfDebug::emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT, DwarfExpr.addExpression(std::move(ExprCursor)); return; } else if (Value.isConstantFP()) { - if (AP.getDwarfVersion() >= 4 && !AP.getDwarfDebug()->tuneForSCE()) { + if (AP.getDwarfVersion() >= 4 && !AP.getDwarfDebug()->tuneForSCE() && + !ExprCursor) { DwarfExpr.addConstantFP(Value.getConstantFP()->getValueAPF(), AP); return; - } else if (Value.getConstantFP() - ->getValueAPF() - .bitcastToAPInt() - .getBitWidth() <= 64 /*bits*/) + } + if (Value.getConstantFP()->getValueAPF().bitcastToAPInt().getBitWidth() <= + 64 /*bits*/) DwarfExpr.addUnsignedConstant( Value.getConstantFP()->getValueAPF().bitcastToAPInt()); else diff --git a/llvm/test/DebugInfo/X86/implicit_value-double.ll b/llvm/test/DebugInfo/X86/implicit_value-double.ll index f205cb9a68eec..956c896b8d34b 100644 --- a/llvm/test/DebugInfo/X86/implicit_value-double.ll +++ b/llvm/test/DebugInfo/X86/implicit_value-double.ll @@ -1,8 +1,8 @@ ;; This test checks for emission of DW_OP_implicit_value operation ;; for double type. -; RUN: llc -debugger-tune=gdb -filetype=obj %s -o - | llvm-dwarfdump - | FileCheck %s -; RUN: llc -debugger-tune=lldb -filetype=obj %s -o - | llvm-dwarfdump - | FileCheck %s +; RUN: llc -O0 -debugger-tune=gdb -filetype=obj %s -o - | llvm-dwarfdump - | FileCheck %s --check-prefixes=CHECK,BOTH +; RUN: llc -O0 -debugger-tune=lldb -filetype=obj %s -o - | llvm-dwarfdump - | FileCheck %s --check-prefixes=CHECK,BOTH ; CHECK: .debug_info contents: ; CHECK: DW_TAG_variable @@ -10,7 +10,7 @@ ; CHECK-NEXT: [{{.*}}): DW_OP_implicit_value 0x8 0x1f 0x85 0xeb 0x51 0xb8 0x1e 0x09 0x40) ; CHECK-NEXT: DW_AT_name ("d") -; RUN: llc -debugger-tune=sce -filetype=obj %s -o - | llvm-dwarfdump - | FileCheck %s -check-prefix=SCE-CHECK +; RUN: llc -O0 -debugger-tune=sce -filetype=obj %s -o - | llvm-dwarfdump - | FileCheck %s -check-prefixes=SCE-CHECK,BOTH ; SCE-CHECK: .debug_info contents: ; SCE-CHECK: DW_TAG_variable @@ -18,13 +18,11 @@ ; SCE-CHECK-NEXT: [{{.*}}): DW_OP_constu 0x40091eb851eb851f, DW_OP_stack_value) ; SCE-CHECK-NEXT: DW_AT_name ("d") -;; Generated from: clang -ggdb -O1 -;;int main() { -;; double d = 3.14; -;; printf("dummy\n"); -;; d *= d; -;; return 0; -;;} +;; Using DW_OP_implicit_value for fragments is not currently supported. +; BOTH: DW_TAG_variable +; BOTH-NEXT: DW_AT_location ({{.*}} +; BOTH-NEXT: [{{.*}}): DW_OP_constu 0x4047800000000000, DW_OP_stack_value, DW_OP_piece 0x8, DW_OP_constu 0x4052800000000000, DW_OP_stack_value, DW_OP_piece 0x8) +; BOTH-NEXT: DW_AT_name ("c") ; ModuleID = 'implicit_value-double.c' source_filename = "implicit_value-double.c" @@ -37,6 +35,8 @@ target triple = "x86_64-unknown-linux-gnu" define dso_local i32 @main() local_unnamed_addr #0 !dbg !7 { entry: call void @llvm.dbg.value(metadata double 3.140000e+00, metadata !12, metadata !DIExpression()), !dbg !14 + call void @llvm.dbg.value(metadata double 4.700000e+01, metadata !17, metadata !DIExpression(DW_OP_LLVM_fragment, 0, 64)), !dbg !14 + call void @llvm.dbg.value(metadata double 7.400000e+01, metadata !17, metadata !DIExpression(DW_OP_LLVM_fragment, 64, 64)), !dbg !14 %puts = call i32 @puts(i8* nonnull dereferenceable(1) getelementptr inbounds ([6 x i8], [6 x i8]* @str, i64 0, i64 0)), !dbg !15 call void @llvm.dbg.value(metadata double undef, metadata !12, metadata !DIExpression()), !dbg !14 ret i32 0, !dbg !16 @@ -67,9 +67,11 @@ attributes #2 = { nofree nounwind } !8 = !DISubroutineType(types: !9) !9 = !{!10} !10 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) -!11 = !{!12} +!11 = !{!12, !17} !12 = !DILocalVariable(name: "d", scope: !7, file: !1, line: 2, type: !13) !13 = !DIBasicType(name: "double", size: 64, encoding: DW_ATE_float) !14 = !DILocation(line: 0, scope: !7) !15 = !DILocation(line: 3, column: 2, scope: !7) !16 = !DILocation(line: 5, column: 2, scope: !7) +!17 = !DILocalVariable(name: "c", scope: !7, file: !1, line: 2, type: !18) +!18 = !DIBasicType(name: "complex", size: 128, encoding: DW_ATE_complex_float) From 9a6de74d5a9e11a7865ce4873ff3297b7efbb673 Mon Sep 17 00:00:00 2001 From: Sjoerd Meijer Date: Tue, 22 Dec 2020 09:13:51 +0000 Subject: [PATCH 075/378] [MachineLICM] Add llvm debug messages to SinkIntoLoop. NFC. I am investigating sinking instructions back into the loop under high register pressure. This is just a first NFC step to add some debug messages that allows tracing of the decision making. --- llvm/lib/CodeGen/MachineLICM.cpp | 55 ++++++++++++++++++++++++-------- 1 file changed, 42 insertions(+), 13 deletions(-) diff --git a/llvm/lib/CodeGen/MachineLICM.cpp b/llvm/lib/CodeGen/MachineLICM.cpp index bc7bb66a82fb6..7c356cf0e15b0 100644 --- a/llvm/lib/CodeGen/MachineLICM.cpp +++ b/llvm/lib/CodeGen/MachineLICM.cpp @@ -800,8 +800,13 @@ void MachineLICMBase::SinkIntoLoop() { I != Preheader->instr_end(); ++I) { // We need to ensure that we can safely move this instruction into the loop. // As such, it must not have side-effects, e.g. such as a call has. - if (IsLoopInvariantInst(*I) && !HasLoopPHIUse(&*I)) + LLVM_DEBUG(dbgs() << "LICM: Analysing sink candidate: " << *I); + if (IsLoopInvariantInst(*I) && !HasLoopPHIUse(&*I)) { + LLVM_DEBUG(dbgs() << "LICM: Added as sink candidate.\n"); Candidates.push_back(&*I); + continue; + } + LLVM_DEBUG(dbgs() << "LICM: Not added as sink candidate.\n"); } for (MachineInstr *I : Candidates) { @@ -811,8 +816,11 @@ void MachineLICMBase::SinkIntoLoop() { if (!MRI->hasOneDef(MO.getReg())) continue; bool CanSink = true; - MachineBasicBlock *B = nullptr; + MachineBasicBlock *SinkBlock = nullptr; + LLVM_DEBUG(dbgs() << "LICM: Try sinking: " << *I); + for (MachineInstr &MI : MRI->use_instructions(MO.getReg())) { + LLVM_DEBUG(dbgs() << "LICM: Analysing use: "; MI.dump()); // FIXME: Come up with a proper cost model that estimates whether sinking // the instruction (and thus possibly executing it on every loop // iteration) is more expensive than a register. @@ -821,24 +829,40 @@ void MachineLICMBase::SinkIntoLoop() { CanSink = false; break; } - if (!B) { - B = MI.getParent(); + if (!SinkBlock) { + SinkBlock = MI.getParent(); + LLVM_DEBUG(dbgs() << "LICM: Setting sink block to: " + << printMBBReference(*SinkBlock) << "\n"); continue; } - B = DT->findNearestCommonDominator(B, MI.getParent()); - if (!B) { + SinkBlock = DT->findNearestCommonDominator(SinkBlock, MI.getParent()); + if (!SinkBlock) { + LLVM_DEBUG(dbgs() << "LICM: Can't find nearest dominator\n"); CanSink = false; break; } + LLVM_DEBUG(dbgs() << "LICM: Setting nearest common dom block: " << + printMBBReference(*SinkBlock) << "\n"); + } + if (!CanSink) { + LLVM_DEBUG(dbgs() << "LICM: Can't sink instruction.\n"); + continue; } - if (!CanSink || !B || B == Preheader) + if (!SinkBlock) { + LLVM_DEBUG(dbgs() << "LICM: Not sinking, can't find sink block.\n"); continue; + } + if (SinkBlock == Preheader) { + LLVM_DEBUG(dbgs() << "LICM: Not sinking, sink block is the preheader\n"); + continue; + } - LLVM_DEBUG(dbgs() << "Sinking to " << printMBBReference(*B) << " from " - << printMBBReference(*I->getParent()) << ": " << *I); - B->splice(B->getFirstNonPHI(), Preheader, I); + LLVM_DEBUG(dbgs() << "LICM: Sinking to " << printMBBReference(*SinkBlock) + << " from " << printMBBReference(*I->getParent()) + << ": " << *I); + SinkBlock->splice(SinkBlock->getFirstNonPHI(), Preheader, I); - // The instruction is is moved from its basic block, so do not retain the + // The instruction is moved from its basic block, so do not retain the // debug information. assert(!I->isDebugInstr() && "Should not sink debug inst"); I->setDebugLoc(DebugLoc()); @@ -1028,6 +1052,7 @@ bool MachineLICMBase::IsLICMCandidate(MachineInstr &I) { bool DontMoveAcrossStore = true; if ((!I.isSafeToMove(AA, DontMoveAcrossStore)) && !(HoistConstStores && isInvariantStore(I, TRI, MRI))) { + LLVM_DEBUG(dbgs() << "LICM: Instruction not safe to move.\n"); return false; } @@ -1038,8 +1063,10 @@ bool MachineLICMBase::IsLICMCandidate(MachineInstr &I) { // indexed load from a jump table. // Stores and side effects are already checked by isSafeToMove. if (I.mayLoad() && !mayLoadFromGOTOrConstantPool(I) && - !IsGuaranteedToExecute(I.getParent())) + !IsGuaranteedToExecute(I.getParent())) { + LLVM_DEBUG(dbgs() << "LICM: Load not guaranteed to execute.\n"); return false; + } // Convergent attribute has been used on operations that involve inter-thread // communication which results are implicitly affected by the enclosing @@ -1056,8 +1083,10 @@ bool MachineLICMBase::IsLICMCandidate(MachineInstr &I) { /// physical registers aren't accessed explicitly, and there are no side /// effects that aren't captured by the operands or other flags. bool MachineLICMBase::IsLoopInvariantInst(MachineInstr &I) { - if (!IsLICMCandidate(I)) + if (!IsLICMCandidate(I)) { + LLVM_DEBUG(dbgs() << "LICM: Instruction not a LICM candidate\n"); return false; + } // The instruction is loop invariant if all of its operands are. for (const MachineOperand &MO : I.operands()) { From c0c0ae16c3312578cd15fd9913aac3ce528b7602 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Tue, 22 Dec 2020 09:20:47 +0000 Subject: [PATCH 076/378] [VPlan] Make VPInstruction a VPDef This patch turns updates VPInstruction to manage the value it defines using VPDef. The VPValue is used during VPlan construction and codegeneration instead of the plain IR reference where possible. Reviewed By: gilr Differential Revision: https://reviews.llvm.org/D90565 --- .../Transforms/Vectorize/LoopVectorize.cpp | 16 ++++++------- llvm/lib/Transforms/Vectorize/VPlan.cpp | 22 +----------------- llvm/lib/Transforms/Vectorize/VPlan.h | 23 ++++++------------- llvm/lib/Transforms/Vectorize/VPlanValue.h | 8 +++---- 4 files changed, 20 insertions(+), 49 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 680106bcb7c6c..0b9e660c987a5 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8433,11 +8433,10 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes( if (auto Recipe = RecipeBuilder.tryToCreateWidenRecipe(Instr, Range, Plan)) { - // Check if the recipe can be converted to a VPValue. We need the extra - // down-casting step until VPRecipeBase inherits from VPValue. - VPValue *MaybeVPValue = Recipe->toVPValue(); - if (!Instr->getType()->isVoidTy() && MaybeVPValue) - Plan->addVPValue(Instr, MaybeVPValue); + for (auto *Def : Recipe->definedValues()) { + auto *UV = Def->getUnderlyingValue(); + Plan->addVPValue(UV, Def); + } RecipeBuilder.setRecipe(Instr, Recipe); VPBB->appendRecipe(Recipe); @@ -8613,10 +8612,11 @@ void LoopVectorizationPlanner::adjustRecipesForInLoopReductions( : nullptr; VPReductionRecipe *RedRecipe = new VPReductionRecipe( &RdxDesc, R, ChainOp, VecOp, CondOp, Legal->hasFunNoNaNAttr(), TTI); - WidenRecipe->toVPValue()->replaceAllUsesWith(RedRecipe); + WidenRecipe->getVPValue()->replaceAllUsesWith(RedRecipe); Plan->removeVPValueFor(R); Plan->addVPValue(R, RedRecipe); WidenRecipe->getParent()->insert(RedRecipe, WidenRecipe->getIterator()); + WidenRecipe->getVPValue()->replaceAllUsesWith(RedRecipe); WidenRecipe->eraseFromParent(); if (Kind == RecurrenceDescriptor::RK_IntegerMinMax || @@ -8625,7 +8625,7 @@ void LoopVectorizationPlanner::adjustRecipesForInLoopReductions( RecipeBuilder.getRecipe(cast(R->getOperand(0))); assert(isa(CompareRecipe) && "Expected to replace a VPWidenSC"); - assert(CompareRecipe->toVPValue()->getNumUsers() == 0 && + assert(cast(CompareRecipe)->getNumUsers() == 0 && "Expected no remaining users"); CompareRecipe->eraseFromParent(); } @@ -8862,7 +8862,7 @@ void VPPredInstPHIRecipe::execute(VPTransformState &State) { void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) { VPValue *StoredValue = isStore() ? getStoredValue() : nullptr; State.ILV->vectorizeMemoryInstruction(&Ingredient, State, - StoredValue ? nullptr : toVPValue(), + StoredValue ? nullptr : getVPValue(), getAddr(), StoredValue, getMask()); } diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index 601c406290b28..f5ce1a3ccafb9 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -119,22 +119,6 @@ VPUser *VPRecipeBase::toVPUser() { return nullptr; } -VPValue *VPRecipeBase::toVPValue() { - if (getNumDefinedValues() == 1) - return getVPValue(); - if (auto *V = dyn_cast(this)) - return V; - return nullptr; -} - -const VPValue *VPRecipeBase::toVPValue() const { - if (getNumDefinedValues() == 1) - return getVPValue(); - if (auto *V = dyn_cast(this)) - return V; - return nullptr; -} - // Get the top-most entry block of \p Start. This is the entry block of the // containing VPlan. This function is templated to support both const and non-const blocks template static T *getPlanEntry(T *Start) { @@ -352,12 +336,8 @@ void VPBasicBlock::execute(VPTransformState *State) { void VPBasicBlock::dropAllReferences(VPValue *NewValue) { for (VPRecipeBase &R : Recipes) { - if (VPValue *Def = R.toVPValue()) + for (auto *Def : R.definedValues()) Def->replaceAllUsesWith(NewValue); - else if (auto *IR = dyn_cast(&R)) { - for (auto *Def : IR->definedValues()) - Def->replaceAllUsesWith(NewValue); - } if (auto *User = R.toVPUser()) for (unsigned I = 0, E = User->getNumOperands(); I != E; I++) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index ecb7004121a26..2b4c9574ae0ee 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -677,22 +677,13 @@ class VPRecipeBase : public ilist_node_with_parent, /// nullptr otherwise. VPUser *toVPUser(); - /// Returns a pointer to a VPValue, if the recipe inherits from VPValue or - /// nullptr otherwise. - VPValue *toVPValue(); - const VPValue *toVPValue() const; - /// Returns the underlying instruction, if the recipe is a VPValue or nullptr /// otherwise. Instruction *getUnderlyingInstr() { - if (auto *VPV = toVPValue()) - return cast_or_null(VPV->getUnderlyingValue()); - return nullptr; + return cast(getVPValue()->getUnderlyingValue()); } const Instruction *getUnderlyingInstr() const { - if (auto *VPV = toVPValue()) - return cast_or_null(VPV->getUnderlyingValue()); - return nullptr; + return cast(getVPValue()->getUnderlyingValue()); } /// Method to support type inquiry through isa, cast, and dyn_cast. @@ -720,7 +711,7 @@ inline bool VPUser::classof(const VPDef *Def) { /// While as any Recipe it may generate a sequence of IR instructions when /// executed, these instructions would always form a single-def expression as /// the VPInstruction is also a single def-use vertex. -class VPInstruction : public VPValue, public VPUser, public VPRecipeBase { +class VPInstruction : public VPRecipeBase, public VPUser, public VPValue { friend class VPlanSlp; public: @@ -746,12 +737,12 @@ class VPInstruction : public VPValue, public VPUser, public VPRecipeBase { public: VPInstruction(unsigned Opcode, ArrayRef Operands) - : VPValue(VPValue::VPVInstructionSC), VPUser(Operands), - VPRecipeBase(VPRecipeBase::VPInstructionSC), Opcode(Opcode) {} + : VPRecipeBase(VPRecipeBase::VPInstructionSC), VPUser(Operands), + VPValue(VPValue::VPVInstructionSC, nullptr, this), Opcode(Opcode) {} VPInstruction(unsigned Opcode, ArrayRef Operands) - : VPValue(VPValue::VPVInstructionSC), VPUser({}), - VPRecipeBase(VPRecipeBase::VPInstructionSC), Opcode(Opcode) { + : VPRecipeBase(VPRecipeBase::VPInstructionSC), VPUser({}), + VPValue(VPValue::VPVInstructionSC, nullptr, this), Opcode(Opcode) { for (auto *I : Operands) addOperand(I->getVPValue()); } diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h index a1adccd5c75cb..1ebe1f8204a56 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanValue.h +++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h @@ -75,10 +75,6 @@ class VPValue { // for multiple underlying IRs (Polly?) by providing a new VPlan front-end, // back-end and analysis information for the new IR. - /// Return the underlying Value attached to this VPValue. - Value *getUnderlyingValue() { return UnderlyingVal; } - const Value *getUnderlyingValue() const { return UnderlyingVal; } - // Set \p Val as the underlying Value of this VPValue. void setUnderlyingValue(Value *Val) { assert(!UnderlyingVal && "Underlying Value is already set."); @@ -86,6 +82,10 @@ class VPValue { } public: + /// Return the underlying Value attached to this VPValue. + Value *getUnderlyingValue() { return UnderlyingVal; } + const Value *getUnderlyingValue() const { return UnderlyingVal; } + /// An enumeration for keeping track of the concrete subclass of VPValue that /// are actually instantiated. Values of this enumeration are kept in the /// SubclassID field of the VPValue objects. They are used for concrete From 6fcb039956483988fa4b82a9a3944084353d00a5 Mon Sep 17 00:00:00 2001 From: Siddhesh Poyarekar Date: Tue, 22 Dec 2020 10:51:41 +0100 Subject: [PATCH 077/378] Fold comparison of __builtin_object_size expression with -1 for non-const size When __builtin_dynamic_object_size returns a non-constant expression, it cannot be -1 since that is an invalid return value for object size. However since passes running after the substitution don't know this, they are unable to optimize away the comparison and hence the comparison and branch stays in there. This change generates an appropriate call to llvm.assume to help the optimizer folding the test. glibc is considering adopting __builtin_dynamic_object_size for additional protection[1] and this change will help reduce branching overhead in fortified implementations of all of the functions that don't have the __builtin___*_chk type builtins, e.g. __ppoll_chk. Also remove the test limit-max-iterations.ll because it was deemed unnecessary during review. [1] https://sourceware.org/pipermail/libc-alpha/2020-November/120191.html Differential Revision: https://reviews.llvm.org/D93015 --- llvm/lib/Analysis/MemoryBuiltins.cpp | 12 +++- .../builtin-dynamic-object-size.ll | 57 ++++++++++++++++++- .../InstCombine/limit-max-iterations.ll | 39 ------------- 3 files changed, 66 insertions(+), 42 deletions(-) delete mode 100644 llvm/test/Transforms/InstCombine/limit-max-iterations.ll diff --git a/llvm/lib/Analysis/MemoryBuiltins.cpp b/llvm/lib/Analysis/MemoryBuiltins.cpp index cbb54e8efdc08..5d82d9dd6ea01 100644 --- a/llvm/lib/Analysis/MemoryBuiltins.cpp +++ b/llvm/lib/Analysis/MemoryBuiltins.cpp @@ -566,8 +566,16 @@ Value *llvm::lowerObjectSizeCall(IntrinsicInst *ObjectSize, Value *UseZero = Builder.CreateICmpULT(SizeOffsetPair.first, SizeOffsetPair.second); ResultSize = Builder.CreateZExtOrTrunc(ResultSize, ResultType); - return Builder.CreateSelect(UseZero, ConstantInt::get(ResultType, 0), - ResultSize); + Value *Ret = Builder.CreateSelect( + UseZero, ConstantInt::get(ResultType, 0), ResultSize); + + // The non-constant size expression cannot evaluate to -1. + if (!isa(SizeOffsetPair.first) || + !isa(SizeOffsetPair.second)) + Builder.CreateAssumption( + Builder.CreateICmpNE(Ret, ConstantInt::get(ResultType, -1))); + + return Ret; } } diff --git a/llvm/test/Transforms/InstCombine/builtin-dynamic-object-size.ll b/llvm/test/Transforms/InstCombine/builtin-dynamic-object-size.ll index 4093a121060c4..91c9d3c2827ff 100644 --- a/llvm/test/Transforms/InstCombine/builtin-dynamic-object-size.ll +++ b/llvm/test/Transforms/InstCombine/builtin-dynamic-object-size.ll @@ -14,7 +14,7 @@ entry: ; CHECK: define i64 @weird_identity_but_ok(i64 %sz) ; CHECK-NEXT: entry: -; CHECK-NEXT: ret i64 %sz +; CHECK: ret i64 %sz ; CHECK-NEXT: } define i64 @phis_are_neat(i1 %which) { @@ -101,6 +101,57 @@ for.end: ; preds = %for.body, %entry ; CHECK: define void @f() ; CHECK: call i64 @llvm.objectsize.i64.p0i8( +define void @bdos_cmpm1(i64 %alloc) { +entry: + %obj = call i8* @malloc(i64 %alloc) + %objsize = call i64 @llvm.objectsize.i64.p0i8(i8* %obj, i1 0, i1 0, i1 1) + %cmp.not = icmp eq i64 %objsize, -1 + br i1 %cmp.not, label %if.else, label %if.then + +if.then: + call void @fortified_chk(i8* %obj, i64 %objsize) + br label %if.end + +if.else: + call void @unfortified(i8* %obj, i64 %objsize) + br label %if.end + +if.end: ; preds = %if.else, %if.then + ret void +} + +; CHECK: define void @bdos_cmpm1( +; CHECK: [[TMP:%.*]] = icmp ne i64 %alloc, -1 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP]]) +; CHECK-NEXT: br i1 false, label %if.else, label %if.then +; CHECK: call void @fortified_chk(i8* %obj, i64 %alloc) + +define void @bdos_cmpm1_expr(i64 %alloc, i64 %part) { +entry: + %sz = udiv i64 %alloc, %part + %obj = call i8* @malloc(i64 %sz) + %objsize = call i64 @llvm.objectsize.i64.p0i8(i8* %obj, i1 0, i1 0, i1 1) + %cmp.not = icmp eq i64 %objsize, -1 + br i1 %cmp.not, label %if.else, label %if.then + +if.then: + call void @fortified_chk(i8* %obj, i64 %objsize) + br label %if.end + +if.else: + call void @unfortified(i8* %obj, i64 %objsize) + br label %if.end + +if.end: ; preds = %if.else, %if.then + ret void +} + +; CHECK: define void @bdos_cmpm1_expr( +; CHECK: [[TMP:%.*]] = icmp ne i64 [[SZ:%.*]], -1 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP]]) +; CHECK-NEXT: br i1 false, label %if.else, label %if.then +; CHECK: call void @fortified_chk(i8* %obj, i64 [[SZ]]) + declare void @bury(i32) local_unnamed_addr #2 ; Function Attrs: nounwind allocsize(0) @@ -113,3 +164,7 @@ declare void @free(i8* nocapture) ; Function Attrs: nounwind readnone speculatable declare i64 @llvm.objectsize.i64.p0i8(i8*, i1, i1, i1) + +declare void @fortified_chk(i8*, i64) + +declare void @unfortified(i8*, i64) diff --git a/llvm/test/Transforms/InstCombine/limit-max-iterations.ll b/llvm/test/Transforms/InstCombine/limit-max-iterations.ll deleted file mode 100644 index a29166c042564..0000000000000 --- a/llvm/test/Transforms/InstCombine/limit-max-iterations.ll +++ /dev/null @@ -1,39 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -instcombine --instcombine-max-iterations=0 -S | FileCheck %s --check-prefix=ZERO -; RUN: opt < %s -instcombine --instcombine-max-iterations=1 -S | FileCheck %s --check-prefix=ONE -; RUN: opt < %s -instcombine -S | FileCheck %s --check-prefix=FIXPOINT -; RUN: not --crash opt < %s -instcombine -S --instcombine-infinite-loop-threshold=2 2>&1 | FileCheck %s --check-prefix=LOOP - -; Based on builtin-dynamic-object-size.ll. This requires multiple iterations of -; InstCombine to reach a fixpoint. - -define i64 @weird_identity_but_ok(i64 %sz) { -; ZERO-LABEL: @weird_identity_but_ok( -; ZERO-NEXT: entry: -; ZERO-NEXT: [[CALL:%.*]] = tail call i8* @malloc(i64 [[SZ:%.*]]) -; ZERO-NEXT: [[CALC_SIZE:%.*]] = tail call i64 @llvm.objectsize.i64.p0i8(i8* [[CALL]], i1 false, i1 true, i1 true) -; ZERO-NEXT: tail call void @free(i8* [[CALL]]) -; ZERO-NEXT: ret i64 [[CALC_SIZE]] -; -; ONE-LABEL: @weird_identity_but_ok( -; ONE-NEXT: entry: -; ONE-NEXT: [[TMP0:%.*]] = sub i64 [[SZ:%.*]], 0 -; ONE-NEXT: [[TMP1:%.*]] = icmp ult i64 [[SZ]], 0 -; ONE-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i64 0, i64 [[TMP0]] -; ONE-NEXT: ret i64 [[TMP2]] -; -; FIXPOINT-LABEL: @weird_identity_but_ok( -; FIXPOINT-NEXT: entry: -; FIXPOINT-NEXT: ret i64 [[SZ:%.*]] -; -; LOOP: LLVM ERROR: Instruction Combining seems stuck in an infinite loop after 2 iterations. -entry: - %call = tail call i8* @malloc(i64 %sz) - %calc_size = tail call i64 @llvm.objectsize.i64.p0i8(i8* %call, i1 false, i1 true, i1 true) - tail call void @free(i8* %call) - ret i64 %calc_size -} - -declare i64 @llvm.objectsize.i64.p0i8(i8*, i1, i1, i1) -declare i8* @malloc(i64) -declare void @free(i8*) From 4d59c8fdb955ea0d668b854f467e12bce05a8857 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Mon, 30 Nov 2020 18:29:55 +0100 Subject: [PATCH 078/378] -fstack-clash-protection: Return an actual error when used on unsupported OS $ clang-12: error: -fstack-clash-protection is not supported on Windows or Mac OS X Differential Revision: https://reviews.llvm.org/D92245 --- clang/include/clang/Basic/DiagnosticDriverKinds.td | 2 ++ clang/lib/Driver/ToolChains/Clang.cpp | 11 +++++++---- llvm/test/CodeGen/X86/stack-clash-large.ll | 2 ++ 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index e92a4bf1dac56..736950b0abb1e 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -273,6 +273,8 @@ def err_drv_unsupported_embed_bitcode : Error<"%0 is not supported with -fembed-bitcode">; def err_drv_bitcode_unsupported_on_toolchain : Error< "-fembed-bitcode is not supported on versions of iOS prior to 6.0">; +def err_drv_stack_clash_protection_unsupported_on_toolchain : Error< + "-fstack-clash-protection is not supported on %0">; def err_drv_invalid_malign_branch_EQ : Error< "invalid argument '%0' to -malign-branch=; each element must be one of: %1">; diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 300ab6e815e23..c04b350dae7d7 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -3067,12 +3067,15 @@ static void RenderSSPOptions(const Driver &D, const ToolChain &TC, } } -static void RenderSCPOptions(const ToolChain &TC, const ArgList &Args, - ArgStringList &CmdArgs) { +static void RenderSCPOptions(const Driver &D, const ToolChain &TC, + const ArgList &Args, ArgStringList &CmdArgs) { const llvm::Triple &EffectiveTriple = TC.getEffectiveTriple(); - if (!EffectiveTriple.isOSLinux()) + if (EffectiveTriple.isOSWindows()) { + D.Diag(diag::err_drv_stack_clash_protection_unsupported_on_toolchain) + << EffectiveTriple.getOSName(); return; + } if (!EffectiveTriple.isX86() && !EffectiveTriple.isSystemZ() && !EffectiveTriple.isPPC64()) @@ -5550,7 +5553,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back(Args.MakeArgString("-mspeculative-load-hardening")); RenderSSPOptions(D, TC, Args, CmdArgs, KernelOrKext); - RenderSCPOptions(TC, Args, CmdArgs); + RenderSCPOptions(D, TC, Args, CmdArgs); RenderTrivialAutoVarInitOptions(D, TC, Args, CmdArgs); // Translate -mstackrealign diff --git a/llvm/test/CodeGen/X86/stack-clash-large.ll b/llvm/test/CodeGen/X86/stack-clash-large.ll index dd53cd8f69646..7deae310f617b 100644 --- a/llvm/test/CodeGen/X86/stack-clash-large.ll +++ b/llvm/test/CodeGen/X86/stack-clash-large.ll @@ -1,5 +1,7 @@ ; RUN: llc -mtriple=x86_64-linux-android < %s | FileCheck -check-prefix=CHECK-X86-64 %s ; RUN: llc -mtriple=i686-linux-android < %s | FileCheck -check-prefix=CHECK-X86-32 %s +; RUN: llc -mtriple=x86_64-unknown-freebsd < %s | FileCheck -check-prefix=CHECK-X86-64 %s +; RUN: llc -mtriple=x86_64-pc-linux-gnu < %s | FileCheck -check-prefix=CHECK-X86-64 %s define i32 @foo() local_unnamed_addr #0 { From 781a816d4cacbd0e73d36b12f82c87c0393b5a5b Mon Sep 17 00:00:00 2001 From: David Spickett Date: Thu, 17 Dec 2020 10:52:37 +0000 Subject: [PATCH 079/378] [llvm][Arm/AArch64] Format extension flags in CPU test failures Previously you just two hex numbers you had to decode manually. This change adds a predicate formatter for extension flags to produce failure messages like: ``` [ RUN ] AArch64CPUTests/AArch64CPUTestFixture.testAArch64CPU/2 <...>llvm/unittests/Support/TargetParserTest.cpp:862: Failure Expected extension flags: +fp-armv8, +crc, +crypto (0xe) Got extension flags: +fp-armv8, +neon, +crc, +crypto (0x1e) [ FAILED ] AArch64CPUTests/AArch64CPUTestFixture.testAArch64CPU/2, where GetParam() = "cortex-a34", "armv8-a", <...> ``` From there you can take the feature name and map it back to the enum in ARM/AArch64TargetParser.def. (which isn't perfect but you've probably got both files open if you're editing these tests) Note that AEK_NONE is not meant to be user facing in the compiler but here it is part of the tests. So failures may show an extension "none" where the normal target parser wouldn't. The formatter is implemented as a template on ARM::ISAKind because the predicate formatters assume all parameters are used for comparison. (e.g. PRED_FORMAT3 is for comparing 3 values, not having 3 arguments in general) Reviewed By: MarkMurrayARM Differential Revision: https://reviews.llvm.org/D93448 --- llvm/unittests/Support/TargetParserTest.cpp | 49 ++++++++++++++++++++- 1 file changed, 47 insertions(+), 2 deletions(-) diff --git a/llvm/unittests/Support/TargetParserTest.cpp b/llvm/unittests/Support/TargetParserTest.cpp index 5208f6a75b022..bc2fd6243aa5f 100644 --- a/llvm/unittests/Support/TargetParserTest.cpp +++ b/llvm/unittests/Support/TargetParserTest.cpp @@ -8,7 +8,9 @@ #include "llvm/Support/TargetParser.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/Support/ARMBuildAttributes.h" +#include "llvm/Support/FormatVariadic.h" #include "gtest/gtest.h" #include @@ -31,6 +33,47 @@ const char *ARMArch[] = { "armv8m.main", "iwmmxt", "iwmmxt2", "xscale", "armv8.1-m.main", }; +template +std::string FormatExtensionFlags(uint64_t Flags) { + std::vector Features; + + if (ISAKind == ARM::ISAKind::AARCH64) { + // AEK_NONE is not meant to be shown to the user so the target parser + // does not recognise it. It is relevant here though. + if (Flags & AArch64::AEK_NONE) + Features.push_back("none"); + AArch64::getExtensionFeatures(Flags, Features); + } else { + if (Flags & ARM::AEK_NONE) + Features.push_back("none"); + ARM::getExtensionFeatures(Flags, Features); + } + + // The target parser also includes every extension you don't have. + // E.g. if AEK_CRC is not set then it adds "-crc". Not useful here. + Features.erase(std::remove_if(Features.begin(), Features.end(), + [](StringRef extension) { + return extension.startswith("-"); + }), + Features.end()); + + return llvm::join(Features, ", "); +} + +template +testing::AssertionResult +AssertSameExtensionFlags(const char *m_expr, const char *n_expr, + uint64_t ExpectedFlags, uint64_t GotFlags) { + if (ExpectedFlags == GotFlags) + return testing::AssertionSuccess(); + + return testing::AssertionFailure() << llvm::formatv( + "Expected extension flags: {0} ({1:x})\n" + " Got extension flags: {2} ({3:x})\n", + FormatExtensionFlags(ExpectedFlags), ExpectedFlags, + FormatExtensionFlags(GotFlags), GotFlags); +} + struct ARMCPUTestParams { ARMCPUTestParams(StringRef CPUName, StringRef ExpectedArch, StringRef ExpectedFPU, uint64_t ExpectedFlags, @@ -65,7 +108,8 @@ TEST_P(ARMCPUTestFixture, ARMCPUTests) { EXPECT_EQ(params.ExpectedFPU, ARM::getFPUName(FPUKind)); uint64_t default_extensions = ARM::getDefaultExtensions(params.CPUName, AK); - EXPECT_EQ(params.ExpectedFlags, default_extensions); + EXPECT_PRED_FORMAT2(AssertSameExtensionFlags, + params.ExpectedFlags, default_extensions); EXPECT_EQ(params.CPUAttr, ARM::getCPUAttr(AK)); } @@ -816,7 +860,8 @@ TEST_P(AArch64CPUTestFixture, testAArch64CPU) { uint64_t default_extensions = AArch64::getDefaultExtensions(params.CPUName, AK); - EXPECT_EQ(params.ExpectedFlags, default_extensions); + EXPECT_PRED_FORMAT2(AssertSameExtensionFlags, + params.ExpectedFlags, default_extensions); unsigned FPUKind = AArch64::getDefaultFPU(params.CPUName, AK); EXPECT_EQ(params.ExpectedFPU, ARM::getFPUName(FPUKind)); From 4b3633cf2cb67220763494427f6db250bbd87494 Mon Sep 17 00:00:00 2001 From: Nathan James Date: Tue, 22 Dec 2020 11:30:55 +0000 Subject: [PATCH 080/378] [clangd] Reuse buffer for JSONTransport::sendMessage Allocate a Buffer in the JSONTransport to be used when sending messages to the client. This gets reused each time a message is sent, reducing in fewer malloc, which is always a bonus. Reviewed By: sammccall Differential Revision: https://reviews.llvm.org/D93531 --- clang-tools-extra/clangd/JSONTransport.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/clang-tools-extra/clangd/JSONTransport.cpp b/clang-tools-extra/clangd/JSONTransport.cpp index eb5a83882b2bd..662e5df4e27bf 100644 --- a/clang-tools-extra/clangd/JSONTransport.cpp +++ b/clang-tools-extra/clangd/JSONTransport.cpp @@ -126,13 +126,13 @@ class JSONTransport : public Transport { bool handleMessage(llvm::json::Value Message, MessageHandler &Handler); // Writes outgoing message to Out stream. void sendMessage(llvm::json::Value Message) { - std::string S; - llvm::raw_string_ostream OS(S); + OutputBuffer.clear(); + llvm::raw_svector_ostream OS(OutputBuffer); OS << llvm::formatv(Pretty ? "{0:2}" : "{0}", Message); - OS.flush(); - Out << "Content-Length: " << S.size() << "\r\n\r\n" << S; + Out << "Content-Length: " << OutputBuffer.size() << "\r\n\r\n" + << OutputBuffer; Out.flush(); - vlog(">>> {0}\n", S); + vlog(">>> {0}\n", OutputBuffer); } // Read raw string messages from input stream. @@ -143,6 +143,7 @@ class JSONTransport : public Transport { llvm::Optional readDelimitedMessage(); llvm::Optional readStandardMessage(); + llvm::SmallVector OutputBuffer; std::FILE *In; llvm::raw_ostream &Out; llvm::raw_ostream &InMirror; From 853770f24130cabc763995bd72419a29670ee3ca Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Tue, 22 Dec 2020 06:35:40 -0500 Subject: [PATCH 081/378] [gn build] (manually) port b8c37153d5393 --- llvm/utils/gn/secondary/clang-tools-extra/clangd/BUILD.gn | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clangd/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clangd/BUILD.gn index 908dce6a12e5b..c07a61fe61e43 100644 --- a/llvm/utils/gn/secondary/clang-tools-extra/clangd/BUILD.gn +++ b/llvm/utils/gn/secondary/clang-tools-extra/clangd/BUILD.gn @@ -11,13 +11,15 @@ config("features_config") { write_cmake_config("features") { input = "Features.inc.in" output = "$target_gen_dir/Features.inc" - values = [] + values = [ + "CLANGD_ENABLE_REMOTE=0", + "CLANGD_MALLOC_TRIM=1", + ] if (clangd_build_xpc) { values += [ "CLANGD_BUILD_XPC=1" ] } else { values += [ "CLANGD_BUILD_XPC=0" ] } - values += [ "CLANGD_ENABLE_REMOTE=0" ] public_configs = [ ":features_config" ] } From ba1202a1e4f75d8f234d01730ac65a913e9baa01 Mon Sep 17 00:00:00 2001 From: Nemanja Ivanovic Date: Tue, 22 Dec 2020 05:43:33 -0600 Subject: [PATCH 082/378] [PowerPC] Restore stack ptr from base ptr when available On subtargets that have a red zone, we will copy the stack pointer to the base pointer in the prologue prior to updating the stack pointer. There are no other updates to the base pointer after that. This suggests that we should be able to restore the stack pointer from the base pointer rather than loading it from the back chain or adding the frame size back to either the stack pointer or the frame pointer. This came about because functions that call setjmp need to restore the SP from the FP because the back chain might have been clobbered (see https://reviews.llvm.org/D92906). However, if the stack is realigned, the restored SP might be incorrect (which is what caused the failures in the two ASan test cases). This patch was tested quite extensivelly both with sanitizer runtimes and general code. Differential revision: https://reviews.llvm.org/D93327 --- .../TestCases/Posix/unpoison-alternate-stack.cpp | 1 - compiler-rt/test/asan/TestCases/longjmp.cpp | 1 - llvm/lib/Target/PowerPC/PPCFrameLowering.cpp | 9 ++++++++- llvm/test/CodeGen/PowerPC/aix-base-pointer.ll | 4 ++-- llvm/test/CodeGen/PowerPC/pr46759.ll | 2 +- .../test/CodeGen/PowerPC/stack-clash-prologue.ll | 16 ++++++++-------- llvm/test/CodeGen/PowerPC/stack-realign.ll | 4 ++-- 7 files changed, 21 insertions(+), 16 deletions(-) diff --git a/compiler-rt/test/asan/TestCases/Posix/unpoison-alternate-stack.cpp b/compiler-rt/test/asan/TestCases/Posix/unpoison-alternate-stack.cpp index 9da47facac276..4774993cdf328 100644 --- a/compiler-rt/test/asan/TestCases/Posix/unpoison-alternate-stack.cpp +++ b/compiler-rt/test/asan/TestCases/Posix/unpoison-alternate-stack.cpp @@ -1,4 +1,3 @@ -// UNSUPPORTED: powerpc64 // Tests that __asan_handle_no_return properly unpoisons the signal alternate // stack. diff --git a/compiler-rt/test/asan/TestCases/longjmp.cpp b/compiler-rt/test/asan/TestCases/longjmp.cpp index bc4165ffd8139..8e9f2ae195c71 100644 --- a/compiler-rt/test/asan/TestCases/longjmp.cpp +++ b/compiler-rt/test/asan/TestCases/longjmp.cpp @@ -1,4 +1,3 @@ -// UNSUPPORTED: powerpc64 // RUN: %clangxx_asan -O %s -o %t && %run %t #include diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp index b93322c155346..50ce11b8374fa 100644 --- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -1644,11 +1644,18 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, // offset by the STDU/STDUX/STWU/STWUX instruction. For targets with red // zone add this offset back now. + // If the function has a base pointer, the stack pointer has been copied + // to it so we can restore it by copying in the other direction. + if (HasRedZone && HasBP) { + BuildMI(MBB, MBBI, dl, OrInst, RBReg). + addReg(BPReg). + addReg(BPReg); + } // If this function contained a fastcc call and GuaranteedTailCallOpt is // enabled (=> hasFastCall()==true) the fastcc call might contain a tail // call which invalidates the stack pointer value in SP(0). So we use the // value of R31 in this case. Similar situation exists with setjmp. - if (FI->hasFastCall() || MF.exposesReturnsTwice()) { + else if (FI->hasFastCall() || MF.exposesReturnsTwice()) { assert(HasFP && "Expecting a valid frame pointer."); if (!HasRedZone) RBReg = FPReg; diff --git a/llvm/test/CodeGen/PowerPC/aix-base-pointer.ll b/llvm/test/CodeGen/PowerPC/aix-base-pointer.ll index 2b1cc0c45db4c..c6e1107d47387 100644 --- a/llvm/test/CodeGen/PowerPC/aix-base-pointer.ll +++ b/llvm/test/CodeGen/PowerPC/aix-base-pointer.ll @@ -27,7 +27,7 @@ declare void @callee(i32*) ; 32BIT: stwux 1, 1, 0 ; 32BIT: addi 3, 1, 64 ; 32BIT: bl .callee -; 32BIT: lwz 1, 0(1) +; 32BIT: mr 1, 30 ; 32BIT: lwz 30, -16(1) ; 64BIT-LABEL: .caller: @@ -38,5 +38,5 @@ declare void @callee(i32*) ; 64BIT: stdux 1, 1, 0 ; 64BIT: addi 3, 1, 128 ; 64BIT: bl .callee -; 64BIT: ld 1, 0(1) +; 64BIT: mr 1, 30 ; 64BIT: ld 30, -24(1) diff --git a/llvm/test/CodeGen/PowerPC/pr46759.ll b/llvm/test/CodeGen/PowerPC/pr46759.ll index 716e050cdbeef..33b44b720b6e1 100644 --- a/llvm/test/CodeGen/PowerPC/pr46759.ll +++ b/llvm/test/CodeGen/PowerPC/pr46759.ll @@ -61,7 +61,7 @@ define void @foo(i32 %vla_size) #0 { ; CHECK-LE-NEXT: .LBB0_6: # %entry ; CHECK-LE-NEXT: addi r3, r1, 2048 ; CHECK-LE-NEXT: lbz r3, 0(r3) -; CHECK-LE-NEXT: ld r1, 0(r1) +; CHECK-LE-NEXT: mr r1, r30 ; CHECK-LE-NEXT: ld r31, -8(r1) ; CHECK-LE-NEXT: ld r30, -16(r1) ; CHECK-LE-NEXT: blr diff --git a/llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll b/llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll index e4d5fa216b453..6443059c97046 100644 --- a/llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll +++ b/llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll @@ -544,7 +544,7 @@ define i32 @f8(i64 %i) local_unnamed_addr #0 { ; CHECK-LE-NEXT: li r5, 1 ; CHECK-LE-NEXT: stwx r5, r4, r3 ; CHECK-LE-NEXT: lwz r3, 64(r1) -; CHECK-LE-NEXT: ld r1, 0(r1) +; CHECK-LE-NEXT: mr r1, r30 ; CHECK-LE-NEXT: ld r30, -16(r1) ; CHECK-LE-NEXT: blr ; @@ -562,7 +562,7 @@ define i32 @f8(i64 %i) local_unnamed_addr #0 { ; CHECK-BE-NEXT: sldi r3, r3, 2 ; CHECK-BE-NEXT: stwx r5, r4, r3 ; CHECK-BE-NEXT: lwz r3, 64(r1) -; CHECK-BE-NEXT: ld r1, 0(r1) +; CHECK-BE-NEXT: mr r1, r30 ; CHECK-BE-NEXT: ld r30, -16(r1) ; CHECK-BE-NEXT: blr ; @@ -631,7 +631,7 @@ define i32 @f9(i64 %i) local_unnamed_addr #0 { ; CHECK-LE-NEXT: li r5, 1 ; CHECK-LE-NEXT: stwx r5, r4, r3 ; CHECK-LE-NEXT: lwz r3, 2048(r1) -; CHECK-LE-NEXT: ld r1, 0(r1) +; CHECK-LE-NEXT: mr r1, r30 ; CHECK-LE-NEXT: ld r30, -16(r1) ; CHECK-LE-NEXT: blr ; @@ -669,7 +669,7 @@ define i32 @f9(i64 %i) local_unnamed_addr #0 { ; CHECK-BE-NEXT: sldi r3, r3, 2 ; CHECK-BE-NEXT: stwx r5, r4, r3 ; CHECK-BE-NEXT: lwz r3, 2048(r1) -; CHECK-BE-NEXT: ld r1, 0(r1) +; CHECK-BE-NEXT: mr r1, r30 ; CHECK-BE-NEXT: ld r30, -16(r1) ; CHECK-BE-NEXT: blr ; @@ -743,7 +743,7 @@ define i32 @f10(i64 %i) local_unnamed_addr #0 { ; CHECK-LE-NEXT: li r5, 1 ; CHECK-LE-NEXT: stwx r5, r4, r3 ; CHECK-LE-NEXT: lwz r3, 1024(r1) -; CHECK-LE-NEXT: ld r1, 0(r1) +; CHECK-LE-NEXT: mr r1, r30 ; CHECK-LE-NEXT: ld r30, -16(r1) ; CHECK-LE-NEXT: blr ; @@ -780,7 +780,7 @@ define i32 @f10(i64 %i) local_unnamed_addr #0 { ; CHECK-BE-NEXT: sldi r3, r3, 2 ; CHECK-BE-NEXT: stwx r5, r4, r3 ; CHECK-BE-NEXT: lwz r3, 1024(r1) -; CHECK-BE-NEXT: ld r1, 0(r1) +; CHECK-BE-NEXT: mr r1, r30 ; CHECK-BE-NEXT: ld r30, -16(r1) ; CHECK-BE-NEXT: blr ; @@ -884,7 +884,7 @@ define void @f11(i32 %vla_size, i64 %i) #0 { ; CHECK-LE-NEXT: .LBB11_8: ; CHECK-LE-NEXT: addi r3, r1, -32768 ; CHECK-LE-NEXT: lbz r3, 0(r3) -; CHECK-LE-NEXT: ld r1, 0(r1) +; CHECK-LE-NEXT: mr r1, r30 ; CHECK-LE-NEXT: ld r31, -8(r1) ; CHECK-LE-NEXT: ld r30, -16(r1) ; CHECK-LE-NEXT: blr @@ -954,7 +954,7 @@ define void @f11(i32 %vla_size, i64 %i) #0 { ; CHECK-BE-NEXT: .LBB11_8: ; CHECK-BE-NEXT: addi r3, r1, -32768 ; CHECK-BE-NEXT: lbz r3, 0(r3) -; CHECK-BE-NEXT: ld r1, 0(r1) +; CHECK-BE-NEXT: mr r1, r30 ; CHECK-BE-NEXT: ld r31, -8(r1) ; CHECK-BE-NEXT: ld r30, -16(r1) ; CHECK-BE-NEXT: blr diff --git a/llvm/test/CodeGen/PowerPC/stack-realign.ll b/llvm/test/CodeGen/PowerPC/stack-realign.ll index abb608797f072..8638592ab8b8e 100644 --- a/llvm/test/CodeGen/PowerPC/stack-realign.ll +++ b/llvm/test/CodeGen/PowerPC/stack-realign.ll @@ -43,7 +43,7 @@ entry: ; CHECK: std 3, 48(30) -; CHECK: ld 1, 0(1) +; CHECK: mr 1, 30 ; CHECK-DAG: ld [[SR:[0-9]+]], 16(1) ; CHECK-DAG: ld 30, -16(1) ; CHECK-DAG: mtlr [[SR]] @@ -69,7 +69,7 @@ entry: ; CHECK-FP: std 3, 48(30) -; CHECK-FP: ld 1, 0(1) +; CHECK-FP: mr 1, 30 ; CHECK-FP-DAG: ld [[SR:[0-9]+]], 16(1) ; CHECK-FP-DAG: ld 31, -8(1) ; CHECK-FP-DAG: ld 30, -16(1) From 00065d5cbd02b0f3fccb34881b58bcd0852b3970 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Tue, 22 Dec 2020 06:51:19 -0500 Subject: [PATCH 083/378] Revert "-fstack-clash-protection: Return an actual error when used on unsupported OS" This reverts commit 4d59c8fdb955ea0d668b854f467e12bce05a8857. Breaks tens of thousands of tests, and had pending review comments, see comments on https://reviews.llvm.org/D92245 (and e.g. http://lab.llvm.org:8011/#/builders/109/builds/5236 for failures). --- clang/include/clang/Basic/DiagnosticDriverKinds.td | 2 -- clang/lib/Driver/ToolChains/Clang.cpp | 11 ++++------- llvm/test/CodeGen/X86/stack-clash-large.ll | 2 -- 3 files changed, 4 insertions(+), 11 deletions(-) diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index 736950b0abb1e..e92a4bf1dac56 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -273,8 +273,6 @@ def err_drv_unsupported_embed_bitcode : Error<"%0 is not supported with -fembed-bitcode">; def err_drv_bitcode_unsupported_on_toolchain : Error< "-fembed-bitcode is not supported on versions of iOS prior to 6.0">; -def err_drv_stack_clash_protection_unsupported_on_toolchain : Error< - "-fstack-clash-protection is not supported on %0">; def err_drv_invalid_malign_branch_EQ : Error< "invalid argument '%0' to -malign-branch=; each element must be one of: %1">; diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index c04b350dae7d7..300ab6e815e23 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -3067,15 +3067,12 @@ static void RenderSSPOptions(const Driver &D, const ToolChain &TC, } } -static void RenderSCPOptions(const Driver &D, const ToolChain &TC, - const ArgList &Args, ArgStringList &CmdArgs) { +static void RenderSCPOptions(const ToolChain &TC, const ArgList &Args, + ArgStringList &CmdArgs) { const llvm::Triple &EffectiveTriple = TC.getEffectiveTriple(); - if (EffectiveTriple.isOSWindows()) { - D.Diag(diag::err_drv_stack_clash_protection_unsupported_on_toolchain) - << EffectiveTriple.getOSName(); + if (!EffectiveTriple.isOSLinux()) return; - } if (!EffectiveTriple.isX86() && !EffectiveTriple.isSystemZ() && !EffectiveTriple.isPPC64()) @@ -5553,7 +5550,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back(Args.MakeArgString("-mspeculative-load-hardening")); RenderSSPOptions(D, TC, Args, CmdArgs, KernelOrKext); - RenderSCPOptions(D, TC, Args, CmdArgs); + RenderSCPOptions(TC, Args, CmdArgs); RenderTrivialAutoVarInitOptions(D, TC, Args, CmdArgs); // Translate -mstackrealign diff --git a/llvm/test/CodeGen/X86/stack-clash-large.ll b/llvm/test/CodeGen/X86/stack-clash-large.ll index 7deae310f617b..dd53cd8f69646 100644 --- a/llvm/test/CodeGen/X86/stack-clash-large.ll +++ b/llvm/test/CodeGen/X86/stack-clash-large.ll @@ -1,7 +1,5 @@ ; RUN: llc -mtriple=x86_64-linux-android < %s | FileCheck -check-prefix=CHECK-X86-64 %s ; RUN: llc -mtriple=i686-linux-android < %s | FileCheck -check-prefix=CHECK-X86-32 %s -; RUN: llc -mtriple=x86_64-unknown-freebsd < %s | FileCheck -check-prefix=CHECK-X86-64 %s -; RUN: llc -mtriple=x86_64-pc-linux-gnu < %s | FileCheck -check-prefix=CHECK-X86-64 %s define i32 @foo() local_unnamed_addr #0 { From 0f81598cc1f46b9919b3500d7c4743dd7a5eac1a Mon Sep 17 00:00:00 2001 From: Alex Richardson Date: Tue, 22 Dec 2020 11:42:07 +0000 Subject: [PATCH 084/378] [libc++] Add a 'is-lockfree-runtime-function' lit feature On macOS 10.14 /usr/lib/system/libcompiler_rt.dylib contains all the `__atomic_load*`, etc. functions but does not include the `__atomic_is_lock_free` function. The lack of this function causes the non-lockfree-atomics feature to be set to false even though large atomic operations are actually supported, it's just the is_lock_free() function that is missing. This is required so that the !non-lockfree-atomics feature can be used to XFAIL tests that require runtime library support (D88818). Reviewed By: #libc, ldionne Differential Revision: https://reviews.llvm.org/D91911 --- .../libcxx/atomics/atomics.align/align.pass.pass.cpp | 2 +- libcxx/utils/libcxx/test/features.py | 11 ++++++++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/libcxx/test/libcxx/atomics/atomics.align/align.pass.pass.cpp b/libcxx/test/libcxx/atomics/atomics.align/align.pass.pass.cpp index ebe8fc82775c7..96a34e22069c8 100644 --- a/libcxx/test/libcxx/atomics/atomics.align/align.pass.pass.cpp +++ b/libcxx/test/libcxx/atomics/atomics.align/align.pass.pass.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// // // UNSUPPORTED: libcpp-has-no-threads, c++03 -// REQUIRES: non-lockfree-atomics +// REQUIRES: is-lockfree-runtime-function // GCC currently fails because it needs -fabi-version=6 to fix mangling of // std::atomic when used with __attribute__((vector(X))). diff --git a/libcxx/utils/libcxx/test/features.py b/libcxx/utils/libcxx/test/features.py index 3023caeea5d3e..a435a938fdcdc 100644 --- a/libcxx/utils/libcxx/test/features.py +++ b/libcxx/utils/libcxx/test/features.py @@ -55,7 +55,16 @@ #include struct Large { int storage[100]; }; std::atomic x; - int main(int, char**) { return x.load(), x.is_lock_free(); } + int main(int, char**) { (void)x.load(); return 0; } + """)), + # TODO: Remove this feature once compiler-rt includes __atomic_is_lockfree() + # on all supported platforms. + Feature(name='is-lockfree-runtime-function', + when=lambda cfg: sourceBuilds(cfg, """ + #include + struct Large { int storage[100]; }; + std::atomic x; + int main(int, char**) { return x.is_lock_free(); } """)), Feature(name='apple-clang', when=_isAppleClang), From 6277bd75dc71ab6cf856ae9a778cab2c90840ca6 Mon Sep 17 00:00:00 2001 From: Alex Richardson Date: Tue, 22 Dec 2020 11:43:48 +0000 Subject: [PATCH 085/378] [compiler-rt] Fix atomic_test.c on macOS The macOS name mangling adds another underscore. Therefore, on macOS the __atomic_* functions are actually ___atomic_* in libcompiler_rt.dylib. To handle this case, prepend the asm() argument with __USER_LABEL_PREFIX__ in the same way that atomic.c does. Reviewed By: ldionne Differential Revision: https://reviews.llvm.org/D92833 --- compiler-rt/test/builtins/Unit/atomic_test.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/compiler-rt/test/builtins/Unit/atomic_test.c b/compiler-rt/test/builtins/Unit/atomic_test.c index 955c08769e089..7c00841e2c0ba 100644 --- a/compiler-rt/test/builtins/Unit/atomic_test.c +++ b/compiler-rt/test/builtins/Unit/atomic_test.c @@ -24,9 +24,13 @@ // should avoid confounding factors, ensuring that we actually test the // functions themselves, regardless of how the builtins are lowered. We need to // use asm labels because we can't redeclare the builtins. +// Note: we need to prepend an underscore to this name for e.g. macOS. +#define _STRINGIFY(x) #x +#define STRINGIFY(x) _STRINGIFY(x) +#define EXTERNAL_NAME(name) asm(STRINGIFY(__USER_LABEL_PREFIX__) #name) -void __atomic_load_c(int size, const void *src, void *dest, - int model) asm("__atomic_load"); +void __atomic_load_c(int size, void *src, void *dest, + int model) EXTERNAL_NAME(__atomic_load); uint8_t __atomic_load_1(uint8_t *src, int model); uint16_t __atomic_load_2(uint16_t *src, int model); @@ -34,7 +38,7 @@ uint32_t __atomic_load_4(uint32_t *src, int model); uint64_t __atomic_load_8(uint64_t *src, int model); void __atomic_store_c(int size, void *dest, const void *src, - int model) asm("__atomic_store"); + int model) EXTERNAL_NAME(__atomic_store); void __atomic_store_1(uint8_t *dest, uint8_t val, int model); void __atomic_store_2(uint16_t *dest, uint16_t val, int model); @@ -42,7 +46,7 @@ void __atomic_store_4(uint32_t *dest, uint32_t val, int model); void __atomic_store_8(uint64_t *dest, uint64_t val, int model); void __atomic_exchange_c(int size, void *ptr, const void *val, void *old, - int model) asm("__atomic_exchange"); + int model) EXTERNAL_NAME(__atomic_exchange); uint8_t __atomic_exchange_1(uint8_t *dest, uint8_t val, int model); uint16_t __atomic_exchange_2(uint16_t *dest, uint16_t val, int model); @@ -51,7 +55,7 @@ uint64_t __atomic_exchange_8(uint64_t *dest, uint64_t val, int model); int __atomic_compare_exchange_c(int size, void *ptr, void *expected, const void *desired, int success, int failure) - asm("__atomic_compare_exchange"); + EXTERNAL_NAME(__atomic_compare_exchange); bool __atomic_compare_exchange_1(uint8_t *ptr, uint8_t *expected, uint8_t desired, int success, int failure); From 3b879fc97305849026db0e856920d318fadbc04b Mon Sep 17 00:00:00 2001 From: Stephen Kelly Date: Sun, 20 Dec 2020 01:32:50 +0000 Subject: [PATCH 086/378] [ASTMatchers] Traverse-ignore range-for implementation details Differential Revision: https://reviews.llvm.org/D93596 --- clang/include/clang/AST/RecursiveASTVisitor.h | 3 ++- clang/lib/ASTMatchers/ASTMatchFinder.cpp | 16 ++++++++++++ .../ASTMatchers/ASTMatchersTraversalTest.cpp | 25 +++++++++++++++++++ 3 files changed, 43 insertions(+), 1 deletion(-) diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h index 61e524793ec70..1426e569eabe1 100644 --- a/clang/include/clang/AST/RecursiveASTVisitor.h +++ b/clang/include/clang/AST/RecursiveASTVisitor.h @@ -468,6 +468,8 @@ template class RecursiveASTVisitor { DEF_TRAVERSE_TMPL_INST(Function) #undef DEF_TRAVERSE_TMPL_INST + bool dataTraverseNode(Stmt *S, DataRecursionQueue *Queue); + private: // These are helper methods used by more than one Traverse* method. bool TraverseTemplateParameterListHelper(TemplateParameterList *TPL); @@ -497,7 +499,6 @@ template class RecursiveASTVisitor { bool VisitOMPClauseWithPreInit(OMPClauseWithPreInit *Node); bool VisitOMPClauseWithPostUpdate(OMPClauseWithPostUpdate *Node); - bool dataTraverseNode(Stmt *S, DataRecursionQueue *Queue); bool PostVisitStmt(Stmt *S); }; diff --git a/clang/lib/ASTMatchers/ASTMatchFinder.cpp b/clang/lib/ASTMatchers/ASTMatchFinder.cpp index cc95371445242..762885fa00527 100644 --- a/clang/lib/ASTMatchers/ASTMatchFinder.cpp +++ b/clang/lib/ASTMatchers/ASTMatchFinder.cpp @@ -463,6 +463,22 @@ class MatchASTVisitor : public RecursiveASTVisitor, bool TraverseConstructorInitializer(CXXCtorInitializer *CtorInit); bool TraverseTemplateArgumentLoc(TemplateArgumentLoc TAL); + bool dataTraverseNode(Stmt *S, DataRecursionQueue *Queue) { + if (auto *RF = dyn_cast(S)) { + for (auto *SubStmt : RF->children()) { + if (SubStmt == RF->getInit() || SubStmt == RF->getLoopVarStmt() || + SubStmt == RF->getRangeInit() || SubStmt == RF->getBody()) { + TraverseStmt(SubStmt, Queue); + } else { + ASTNodeNotSpelledInSourceScope RAII(this, true); + TraverseStmt(SubStmt, Queue); + } + } + return true; + } + return RecursiveASTVisitor::dataTraverseNode(S, Queue); + } + // Matches children or descendants of 'Node' with 'BaseMatcher'. bool memoizedMatchesRecursively(const DynTypedNode &Node, ASTContext &Ctx, const DynTypedMatcher &Matcher, diff --git a/clang/unittests/ASTMatchers/ASTMatchersTraversalTest.cpp b/clang/unittests/ASTMatchers/ASTMatchersTraversalTest.cpp index 10d2d6ec3916a..a3a3a911b85c2 100644 --- a/clang/unittests/ASTMatchers/ASTMatchersTraversalTest.cpp +++ b/clang/unittests/ASTMatchers/ASTMatchersTraversalTest.cpp @@ -2580,6 +2580,31 @@ struct CtorInitsNonTrivial : NonTrivial EXPECT_TRUE(matches(Code, traverse(TK_AsIs, M))); EXPECT_TRUE(matches(Code, traverse(TK_IgnoreUnlessSpelledInSource, M))); } + { + auto M = binaryOperator(hasOperatorName("!=")); + EXPECT_TRUE(matches(Code, traverse(TK_AsIs, M))); + EXPECT_FALSE(matches(Code, traverse(TK_IgnoreUnlessSpelledInSource, M))); + } + { + auto M = unaryOperator(hasOperatorName("++")); + EXPECT_TRUE(matches(Code, traverse(TK_AsIs, M))); + EXPECT_FALSE(matches(Code, traverse(TK_IgnoreUnlessSpelledInSource, M))); + } + { + auto M = declStmt(hasSingleDecl(varDecl(matchesName("__range")))); + EXPECT_TRUE(matches(Code, traverse(TK_AsIs, M))); + EXPECT_FALSE(matches(Code, traverse(TK_IgnoreUnlessSpelledInSource, M))); + } + { + auto M = declStmt(hasSingleDecl(varDecl(matchesName("__begin")))); + EXPECT_TRUE(matches(Code, traverse(TK_AsIs, M))); + EXPECT_FALSE(matches(Code, traverse(TK_IgnoreUnlessSpelledInSource, M))); + } + { + auto M = declStmt(hasSingleDecl(varDecl(matchesName("__end")))); + EXPECT_TRUE(matches(Code, traverse(TK_AsIs, M))); + EXPECT_FALSE(matches(Code, traverse(TK_IgnoreUnlessSpelledInSource, M))); + } Code = R"cpp( void rangeFor() From b9b62c28677d2c812604e29bab27c1e2a2144e4b Mon Sep 17 00:00:00 2001 From: Sjoerd Meijer Date: Tue, 22 Dec 2020 12:10:43 +0000 Subject: [PATCH 087/378] [AArch64] Add a test for MachineLICM SinkIntoLoop. NFC. --- .../AArch64/machine-licm-sink-instr.ll | 176 ++++++++++++++++++ 1 file changed, 176 insertions(+) create mode 100644 llvm/test/CodeGen/AArch64/machine-licm-sink-instr.ll diff --git a/llvm/test/CodeGen/AArch64/machine-licm-sink-instr.ll b/llvm/test/CodeGen/AArch64/machine-licm-sink-instr.ll new file mode 100644 index 0000000000000..f8d53a574dd2f --- /dev/null +++ b/llvm/test/CodeGen/AArch64/machine-licm-sink-instr.ll @@ -0,0 +1,176 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=aarch64 -sink-insts-to-avoid-spills | FileCheck %s + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" + +@A = external dso_local global [100 x i32], align 4 + +define i32 @sink_load_and_copy(i32 %n) { +; CHECK-LABEL: sink_load_and_copy: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill +; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w20, -16 +; CHECK-NEXT: .cfi_offset w21, -24 +; CHECK-NEXT: .cfi_offset w30, -32 +; CHECK-NEXT: mov w19, w0 +; CHECK-NEXT: cmp w0, #1 // =1 +; CHECK-NEXT: b.lt .LBB0_3 +; CHECK-NEXT: // %bb.1: // %for.body.preheader +; CHECK-NEXT: adrp x8, A +; CHECK-NEXT: ldr w21, [x8, :lo12:A] +; CHECK-NEXT: mov w20, w19 +; CHECK-NEXT: .LBB0_2: // %for.body +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: mov w0, w21 +; CHECK-NEXT: bl _Z3usei +; CHECK-NEXT: subs w19, w19, #1 // =1 +; CHECK-NEXT: sdiv w20, w20, w0 +; CHECK-NEXT: b.ne .LBB0_2 +; CHECK-NEXT: b .LBB0_4 +; CHECK-NEXT: .LBB0_3: +; CHECK-NEXT: mov w20, w19 +; CHECK-NEXT: .LBB0_4: // %for.cond.cleanup +; CHECK-NEXT: mov w0, w20 +; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload +; CHECK-NEXT: ret +entry: + %cmp63 = icmp sgt i32 %n, 0 + br i1 %cmp63, label %for.body.preheader, label %for.cond.cleanup + +for.body.preheader: + %0 = load i32, i32* getelementptr inbounds ([100 x i32], [100 x i32]* @A, i64 0, i64 0), align 4 + br label %for.body + +for.cond.cleanup: + %sum.0.lcssa = phi i32 [ %n, %entry ], [ %div, %for.body ] + ret i32 %sum.0.lcssa + +for.body: + %lsr.iv = phi i32 [ %n, %for.body.preheader ], [ %lsr.iv.next, %for.body ] + %sum.065 = phi i32 [ %div, %for.body ], [ %n, %for.body.preheader ] + %call = tail call i32 @_Z3usei(i32 %0) + %div = sdiv i32 %sum.065, %call + %lsr.iv.next = add i32 %lsr.iv, -1 + %exitcond.not = icmp eq i32 %lsr.iv.next, 0 + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + +define i32 @cant_sink_successive_call(i32 %n) { +; CHECK-LABEL: cant_sink_successive_call: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill +; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w20, -16 +; CHECK-NEXT: .cfi_offset w21, -24 +; CHECK-NEXT: .cfi_offset w30, -32 +; CHECK-NEXT: mov w19, w0 +; CHECK-NEXT: cmp w0, #1 // =1 +; CHECK-NEXT: b.lt .LBB1_3 +; CHECK-NEXT: // %bb.1: // %for.body.preheader +; CHECK-NEXT: adrp x8, A +; CHECK-NEXT: ldr w20, [x8, :lo12:A] +; CHECK-NEXT: mov w0, w19 +; CHECK-NEXT: bl _Z3usei +; CHECK-NEXT: mov w21, w19 +; CHECK-NEXT: .LBB1_2: // %for.body +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: mov w0, w20 +; CHECK-NEXT: bl _Z3usei +; CHECK-NEXT: subs w19, w19, #1 // =1 +; CHECK-NEXT: sdiv w21, w21, w0 +; CHECK-NEXT: b.ne .LBB1_2 +; CHECK-NEXT: b .LBB1_4 +; CHECK-NEXT: .LBB1_3: +; CHECK-NEXT: mov w21, w19 +; CHECK-NEXT: .LBB1_4: // %for.cond.cleanup +; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: mov w0, w21 +; CHECK-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload +; CHECK-NEXT: ret +entry: + %cmp63 = icmp sgt i32 %n, 0 + br i1 %cmp63, label %for.body.preheader, label %for.cond.cleanup + +for.body.preheader: + %0 = load i32, i32* getelementptr inbounds ([100 x i32], [100 x i32]* @A, i64 0, i64 0), align 4 + %call0 = tail call i32 @_Z3usei(i32 %n) + br label %for.body + +for.cond.cleanup: + %sum.0.lcssa = phi i32 [ %n, %entry ], [ %div, %for.body ] + ret i32 %sum.0.lcssa + +for.body: + %lsr.iv = phi i32 [ %n, %for.body.preheader ], [ %lsr.iv.next, %for.body ] + %sum.065 = phi i32 [ %div, %for.body ], [ %n, %for.body.preheader ] + %call = tail call i32 @_Z3usei(i32 %0) + %div = sdiv i32 %sum.065, %call + %lsr.iv.next = add i32 %lsr.iv, -1 + %exitcond.not = icmp eq i32 %lsr.iv.next, 0 + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + +define i32 @cant_sink_successive_store(i32* nocapture readnone %store, i32 %n) { +; CHECK-LABEL: cant_sink_successive_store: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill +; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w20, -16 +; CHECK-NEXT: .cfi_offset w21, -24 +; CHECK-NEXT: .cfi_offset w30, -32 +; CHECK-NEXT: mov w19, w1 +; CHECK-NEXT: cmp w1, #1 // =1 +; CHECK-NEXT: b.lt .LBB2_3 +; CHECK-NEXT: // %bb.1: // %for.body.preheader +; CHECK-NEXT: adrp x8, A +; CHECK-NEXT: ldr w20, [x8, :lo12:A] +; CHECK-NEXT: mov w8, #42 +; CHECK-NEXT: mov w21, w19 +; CHECK-NEXT: str w8, [x0] +; CHECK-NEXT: .LBB2_2: // %for.body +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: mov w0, w20 +; CHECK-NEXT: bl _Z3usei +; CHECK-NEXT: subs w19, w19, #1 // =1 +; CHECK-NEXT: sdiv w21, w21, w0 +; CHECK-NEXT: b.ne .LBB2_2 +; CHECK-NEXT: b .LBB2_4 +; CHECK-NEXT: .LBB2_3: +; CHECK-NEXT: mov w21, w19 +; CHECK-NEXT: .LBB2_4: // %for.cond.cleanup +; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: mov w0, w21 +; CHECK-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload +; CHECK-NEXT: ret +entry: + %cmp63 = icmp sgt i32 %n, 0 + br i1 %cmp63, label %for.body.preheader, label %for.cond.cleanup + +for.body.preheader: + %0 = load i32, i32* getelementptr inbounds ([100 x i32], [100 x i32]* @A, i64 0, i64 0), align 4 + store i32 42, i32* %store, align 4 + br label %for.body + +for.cond.cleanup: + %sum.0.lcssa = phi i32 [ %n, %entry ], [ %div, %for.body ] + ret i32 %sum.0.lcssa + +for.body: + %lsr.iv = phi i32 [ %n, %for.body.preheader ], [ %lsr.iv.next, %for.body ] + %sum.065 = phi i32 [ %div, %for.body ], [ %n, %for.body.preheader ] + %call = tail call i32 @_Z3usei(i32 %0) + %div = sdiv i32 %sum.065, %call + %lsr.iv.next = add i32 %lsr.iv, -1 + %exitcond.not = icmp eq i32 %lsr.iv.next, 0 + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + +declare i32 @_Z3usei(i32) From c4fc8a21d1dbfd2e269a59533808913b46faa8ce Mon Sep 17 00:00:00 2001 From: mydeveloperday Date: Tue, 22 Dec 2020 12:44:57 +0000 Subject: [PATCH 088/378] [clang-format] NFC keep the code clang-formatted --- clang/lib/Format/TokenAnnotator.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) mode change 100644 => 100755 clang/lib/Format/TokenAnnotator.cpp diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp old mode 100644 new mode 100755 index 821b46b75f6bc..733ca1e0e8526 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -1964,9 +1964,9 @@ class AnnotatingParser { if (PrevToken->isOneOf(tok::l_paren, tok::l_square, tok::l_brace, tok::comma, tok::semi, tok::kw_return, tok::colon, - tok::kw_co_return, tok::kw_co_await, tok::kw_co_yield, - tok::equal, tok::kw_delete, tok::kw_sizeof, - tok::kw_throw) || + tok::kw_co_return, tok::kw_co_await, + tok::kw_co_yield, tok::equal, tok::kw_delete, + tok::kw_sizeof, tok::kw_throw) || PrevToken->isOneOf(TT_BinaryOperator, TT_ConditionalExpr, TT_UnaryOperator, TT_CastRParen)) return TT_UnaryOperator; From 44e74c75e614af453f4824cb9bf1f0056d7cf426 Mon Sep 17 00:00:00 2001 From: Andrzej Warzynski Date: Tue, 22 Dec 2020 11:07:58 +0000 Subject: [PATCH 089/378] [flang][driver] Refactor unit tests for frontend actions (nfc) These patch implements a few non-functional-changes: * switch to using test fixtures for better code sharing * rename some variables (e.g. to communicate their purpose a bit better) This patch doesn't change _what_ is being tested. Differential Revision: https://reviews.llvm.org/D93544 --- .../unittests/Frontend/FrontendActionTest.cpp | 178 +++++++++--------- 1 file changed, 90 insertions(+), 88 deletions(-) diff --git a/flang/unittests/Frontend/FrontendActionTest.cpp b/flang/unittests/Frontend/FrontendActionTest.cpp index b49b7312525a2..fb4de6d307cd8 100644 --- a/flang/unittests/Frontend/FrontendActionTest.cpp +++ b/flang/unittests/Frontend/FrontendActionTest.cpp @@ -8,6 +8,7 @@ #include "gtest/gtest.h" #include "flang/Frontend/CompilerInstance.h" +#include "flang/Frontend/CompilerInvocation.h" #include "flang/Frontend/FrontendOptions.h" #include "flang/FrontendTool/Utils.h" #include "llvm/Support/FileSystem.h" @@ -17,119 +18,120 @@ using namespace Fortran::frontend; namespace { -TEST(FrontendAction, PrintPreprocessedInput) { - std::string inputFile = "pp-test-file.f"; - std::error_code ec; +class FrontendActionTest : public ::testing::Test { +protected: + // AllSources (which is used to manage files inside every compiler + // instance), works with paths. So we need a filename and a path for the + // input file. + // TODO: We could use `-` for inputFilePath_, but then we'd need a way to + // write to stdin that's then read by AllSources. Ideally, AllSources should + // be capable of reading from any stream. + std::string inputFileName_; + std::string inputFilePath_; + // The output stream for the input file. Use this to populate the input. + std::unique_ptr inputFileOs_; + + std::error_code ec_; + + CompilerInstance compInst_; + std::shared_ptr invocation_; + + void SetUp() override { + // Generate a unique test file name. + const testing::TestInfo *const test_info = + testing::UnitTest::GetInstance()->current_test_info(); + inputFileName_ = std::string(test_info->name()) + "_test-file.f"; + + // Create the input file stream. Note that this stream is populated + // separately in every test (i.e. the input is test specific). + inputFileOs_ = std::make_unique( + inputFileName_, ec_, llvm::sys::fs::OF_None); + if (ec_) + FAIL() << "Failed to create the input file"; + + // Get the path of the input file. + llvm::SmallString<256> cwd; + if (std::error_code ec_ = llvm::sys::fs::current_path(cwd)) + FAIL() << "Failed to obtain the current working directory"; + inputFilePath_ = cwd.c_str(); + inputFilePath_ += "/" + inputFileName_; + + // Prepare the compiler (CompilerInvocation + CompilerInstance) + compInst_.CreateDiagnostics(); + invocation_ = std::make_shared(); + + compInst_.set_invocation(std::move(invocation_)); + compInst_.frontendOpts().inputs_.push_back( + FrontendInputFile(inputFilePath_, Language::Fortran)); + } + + void TearDown() override { + // Clear the input file. + llvm::sys::fs::remove(inputFileName_); + + // Clear the output files. + // Note that these tests use an output buffer (as opposed to an output + // file), hence there are no physical output files to delete and + // `EraseFiles` is set to `false`. Also, some actions (e.g. + // `ParseSyntaxOnly`) don't generated output. In such cases there's no + // output to clear and `ClearOutputFile` returns immediately. + compInst_.ClearOutputFiles(/*EraseFiles=*/false); + } +}; + +TEST_F(FrontendActionTest, PrintPreprocessedInput) { + // Populate the input file with the pre-defined input and flush it. + *(inputFileOs_) << "#ifdef NEW\n" + << " Program A \n" + << "#else\n" + << " Program B\n" + << "#endif"; + inputFileOs_.reset(); - // 1. Create the input file for the file manager - // AllSources (which is used to manage files inside every compiler instance), - // works with paths. This means that it requires a physical file. Create one. - std::unique_ptr os{ - new llvm::raw_fd_ostream(inputFile, ec, llvm::sys::fs::OF_None)}; - if (ec) - FAIL() << "Fail to create the file need by the test"; + // Set-up the action kind. + compInst_.invocation().frontendOpts().programAction_ = PrintPreprocessedInput; - // Populate the input file with the pre-defined input and flush it. - *(os) << "! test-file.F:\n" - << "#ifdef NEW\n" - << " Program A \n" - << "#else\n" - << " Program B\n" - << "#endif"; - os.reset(); - - // Get the path of the input file - llvm::SmallString<64> cwd; - if (std::error_code ec = llvm::sys::fs::current_path(cwd)) - FAIL() << "Failed to obtain the current working directory"; - std::string testFilePath(cwd.c_str()); - testFilePath += "/" + inputFile; - - // 2. Prepare the compiler (CompilerInvocation + CompilerInstance) - CompilerInstance compInst; - compInst.CreateDiagnostics(); - auto invocation = std::make_shared(); - invocation->frontendOpts().programAction_ = PrintPreprocessedInput; - - compInst.set_invocation(std::move(invocation)); - compInst.frontendOpts().inputs_.push_back( - FrontendInputFile(testFilePath, Language::Fortran)); - - // 3. Set-up the output stream. Using output buffer wrapped as an output + // Set-up the output stream. We are using output buffer wrapped as an output // stream, as opposed to an actual file (or a file descriptor). llvm::SmallVector outputFileBuffer; std::unique_ptr outputFileStream( new llvm::raw_svector_ostream(outputFileBuffer)); - compInst.set_outputStream(std::move(outputFileStream)); + compInst_.set_outputStream(std::move(outputFileStream)); - // 4. Run the earlier defined FrontendAction - bool success = ExecuteCompilerInvocation(&compInst); + // Execute the action. + bool success = ExecuteCompilerInvocation(&compInst_); - // 5. Validate the expected output + // Validate the expected output. EXPECT_TRUE(success); EXPECT_TRUE(!outputFileBuffer.empty()); EXPECT_TRUE( llvm::StringRef(outputFileBuffer.data()).startswith("program b\n")); - - // 6. Clear the input and the output files. Since we used an output buffer, - // there are no physical output files to delete. - llvm::sys::fs::remove(inputFile); - compInst.ClearOutputFiles(/*EraseFiles=*/true); } -TEST(FrontendAction, ParseSyntaxOnly) { - std::string inputFile = "syntax-only-test-file.f"; - std::error_code ec; +TEST_F(FrontendActionTest, ParseSyntaxOnly) { + // Populate the input file with the pre-defined input and flush it. + *(inputFileOs_) << "IF (A > 0.0) IF (B < 0.0) A = LOG (A)\n" + << "END"; + inputFileOs_.reset(); - // 1. Create the input file for the file manager - // AllSources (which is used to manage files inside every compiler instance), - // works with paths. This means that it requires a physical file. Create one. - std::unique_ptr os{ - new llvm::raw_fd_ostream(inputFile, ec, llvm::sys::fs::OF_None)}; - if (ec) - FAIL() << "Fail to create the file need by the test"; + // Set-up the action kind. + compInst_.invocation().frontendOpts().programAction_ = ParseSyntaxOnly; - // Populate the input file with the pre-defined input and flush it. - *(os) << "! if_stmt.f90:\n" - << "IF (A > 0.0) IF (B < 0.0) A = LOG (A)\n" - << "END"; - os.reset(); - - // Get the path of the input file - llvm::SmallString<64> cwd; - if (std::error_code ec = llvm::sys::fs::current_path(cwd)) - FAIL() << "Failed to obtain the current working directory"; - std::string testFilePath(cwd.c_str()); - testFilePath += "/" + inputFile; - - // 2. Prepare the compiler (CompilerInvocation + CompilerInstance) - CompilerInstance compInst; - compInst.CreateDiagnostics(); - auto invocation = std::make_shared(); - invocation->frontendOpts().programAction_ = ParseSyntaxOnly; - - compInst.set_invocation(std::move(invocation)); - compInst.frontendOpts().inputs_.push_back( - FrontendInputFile(testFilePath, Language::Fortran)); - - // 3. Set-up the output stream for the semantic diagnostics. + // Set-up the output stream for the semantic diagnostics. llvm::SmallVector outputDiagBuffer; std::unique_ptr outputStream( new llvm::raw_svector_ostream(outputDiagBuffer)); - compInst.set_semaOutputStream(std::move(outputStream)); + compInst_.set_semaOutputStream(std::move(outputStream)); - // 4. Execute the ParseSyntaxOnly action - bool success = ExecuteCompilerInvocation(&compInst); + // Execute the action. + bool success = ExecuteCompilerInvocation(&compInst_); - // 5. Validate the expected output + // Validate the expected output. EXPECT_FALSE(success); EXPECT_TRUE(!outputDiagBuffer.empty()); EXPECT_TRUE( llvm::StringRef(outputDiagBuffer.data()) .startswith( - ":2:14: error: IF statement is not allowed in IF statement\n")); - - // 6. Clear the input files. - llvm::sys::fs::remove(inputFile); + ":1:14: error: IF statement is not allowed in IF statement\n")); } } // namespace From 63a24816f561a5d8e28ca7054892bd8602618be4 Mon Sep 17 00:00:00 2001 From: Jan Svoboda Date: Mon, 21 Dec 2020 14:28:09 +0100 Subject: [PATCH 090/378] [clang][cli] Implement `getAllArgValues` marshalling This infrastructure can be used ~30 more command line options. Reviewed By: dexonsmith Differential Revision: https://reviews.llvm.org/D93631 --- clang/include/clang/Driver/Options.td | 3 +- clang/lib/Frontend/CompilerInvocation.cpp | 18 ++++++++- .../Frontend/CompilerInvocationTest.cpp | 40 +++++++++++++++++++ llvm/include/llvm/Option/OptParser.td | 6 +++ 4 files changed, 65 insertions(+), 2 deletions(-) diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 82c4e9399d9d5..3373984b76ae6 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -1915,7 +1915,8 @@ def fsystem_module : Flag<["-"], "fsystem-module">, Flags<[CC1Option]>, MarshallingInfoFlag<"FrontendOpts.IsSystemModule">; def fmodule_map_file : Joined<["-"], "fmodule-map-file=">, Group, Flags<[NoXarchOption,CC1Option]>, MetaVarName<"">, - HelpText<"Load this module map file">; + HelpText<"Load this module map file">, + MarshallingInfoStringVector<"FrontendOpts.ModuleMapFiles">; def fmodule_file : Joined<["-"], "fmodule-file=">, Group, Flags<[NoXarchOption,CC1Option]>, MetaVarName<"[=]">, HelpText<"Specify the mapping of module name to precompiled module file, or load a module file if name is omitted.">; diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index fc5fd15475995..d7c1a6ffd600f 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -323,6 +323,23 @@ static Optional normalizeStringIntegral(OptSpecifier Opt, int, return Res; } +static Optional> +normalizeStringVector(OptSpecifier Opt, int, const ArgList &Args, + DiagnosticsEngine &) { + return Args.getAllArgValues(Opt); +} + +static void denormalizeStringVector(SmallVectorImpl &Args, + const char *Spelling, + CompilerInvocation::StringAllocator SA, + Option::OptionClass OptClass, + unsigned TableIndex, + const std::vector &Values) { + for (const std::string &Value : Values) { + denormalizeString(Args, Spelling, SA, OptClass, TableIndex, Value); + } +} + static Optional normalizeTriple(OptSpecifier Opt, int TableIndex, const ArgList &Args, DiagnosticsEngine &Diags) { @@ -1715,7 +1732,6 @@ static InputKind ParseFrontendArgs(FrontendOptions &Opts, ArgList &Args, Opts.LLVMArgs = Args.getAllArgValues(OPT_mllvm); Opts.ASTDumpDecls = Args.hasArg(OPT_ast_dump, OPT_ast_dump_EQ); Opts.ASTDumpAll = Args.hasArg(OPT_ast_dump_all, OPT_ast_dump_all_EQ); - Opts.ModuleMapFiles = Args.getAllArgValues(OPT_fmodule_map_file); // Only the -fmodule-file= form. for (const auto *A : Args.filtered(OPT_fmodule_file)) { StringRef Val = A->getValue(); diff --git a/clang/unittests/Frontend/CompilerInvocationTest.cpp b/clang/unittests/Frontend/CompilerInvocationTest.cpp index 71e8d0907fc8c..5738f70791718 100644 --- a/clang/unittests/Frontend/CompilerInvocationTest.cpp +++ b/clang/unittests/Frontend/CompilerInvocationTest.cpp @@ -18,6 +18,7 @@ using namespace llvm; using namespace clang; using ::testing::Contains; +using ::testing::HasSubstr; using ::testing::StrEq; namespace { @@ -408,6 +409,45 @@ TEST_F(CommandLineTest, JoinedEnumDefault) { ASSERT_THAT(GeneratedArgs, Not(Contains(StrEq("legacy")))); } +TEST_F(CommandLineTest, StringVectorEmpty) { + const char *Args[] = {""}; + + CompilerInvocation::CreateFromArgs(Invocation, Args, *Diags); + + ASSERT_FALSE(Diags->hasErrorOccurred()); + ASSERT_TRUE(Invocation.getFrontendOpts().ModuleMapFiles.empty()); + + Invocation.generateCC1CommandLine(GeneratedArgs, *this); + ASSERT_THAT(GeneratedArgs, Not(Contains(HasSubstr("-fmodule-map-file=")))); +} + +TEST_F(CommandLineTest, StringVectorSingle) { + const char *Args[] = {"-fmodule-map-file=a"}; + + CompilerInvocation::CreateFromArgs(Invocation, Args, *Diags); + + ASSERT_FALSE(Diags->hasErrorOccurred()); + ASSERT_EQ(Invocation.getFrontendOpts().ModuleMapFiles, + std::vector({"a"})); + + Invocation.generateCC1CommandLine(GeneratedArgs, *this); + ASSERT_EQ(count(GeneratedArgs, StringRef("-fmodule-map-file=a")), 1); +} + +TEST_F(CommandLineTest, StringVectorMultiple) { + const char *Args[] = {"-fmodule-map-file=a", "-fmodule-map-file=b"}; + + CompilerInvocation::CreateFromArgs(Invocation, Args, *Diags); + + ASSERT_FALSE(Diags->hasErrorOccurred()); + ASSERT_TRUE(Invocation.getFrontendOpts().ModuleMapFiles == + std::vector({"a", "b"})); + + Invocation.generateCC1CommandLine(GeneratedArgs, *this); + ASSERT_EQ(count(GeneratedArgs, StringRef("-fmodule-map-file=a")), 1); + ASSERT_EQ(count(GeneratedArgs, StringRef("-fmodule-map-file=b")), 1); +} + // Tree of boolean options that can be (directly or transitively) implied by // their parent: // diff --git a/llvm/include/llvm/Option/OptParser.td b/llvm/include/llvm/Option/OptParser.td index d7d4e03b15f07..9addaa781caa4 100644 --- a/llvm/include/llvm/Option/OptParser.td +++ b/llvm/include/llvm/Option/OptParser.td @@ -167,6 +167,12 @@ class MarshallingInfoStringInt + : MarshallingInfo({})"> { + code Normalizer = "normalizeStringVector"; + code Denormalizer = "denormalizeStringVector"; +} + class MarshallingInfoFlag : MarshallingInfo { code Normalizer = "normalizeSimpleFlag"; From 31b67d2debd43b9854248b853a52ea989c7322a0 Mon Sep 17 00:00:00 2001 From: Andrzej Warzynski Date: Tue, 22 Dec 2020 13:38:13 +0000 Subject: [PATCH 091/378] [flang][driver] Fix formatting in a test (nfc) --- flang/test/Frontend/print-preprocessed-file.f90 | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/flang/test/Frontend/print-preprocessed-file.f90 b/flang/test/Frontend/print-preprocessed-file.f90 index 2ba4a4f957c65..873bb2b3e3700 100644 --- a/flang/test/Frontend/print-preprocessed-file.f90 +++ b/flang/test/Frontend/print-preprocessed-file.f90 @@ -8,15 +8,13 @@ ! RUN: %flang-new -E %s 2>&1 | FileCheck %s !----------------------------------------- -! FRONTEND FLANG DRIVER (flang-new -fc1) +! FRONTEND FLANG DRIVER (flang-new -fc1) !----------------------------------------- ! RUN: %flang-new -fc1 -E %s 2>&1 | FileCheck %s - !----------------------- ! EXPECTED OUTPUT !----------------------- -! flang-new -E %s ! CHECK:program a ! CHECK-NOT:program b ! CHECK-NEXT:x = 1 From bef9eb84b2fb17b22ca771c8c5c34a85f141168d Mon Sep 17 00:00:00 2001 From: Jan Svoboda Date: Mon, 21 Dec 2020 13:50:09 +0100 Subject: [PATCH 092/378] [clang] NFC: Refactor custom class into a lambda in CompilerInvocation Change `makeFlagToValueNormalizer` so that one specialization converts all integral/enum arguments into `uint64_t` and forwards them to the more generic version. This makes it easy to replace the custom `FlagToValueNormalizer` struct with a lambda, which is the common approach in other (de)normalizers. Finally, drop custom `is_int_convertbile` in favor of `llvm::is_integral_or_enum`. Reviewed By: dexonsmith Differential Revision: https://reviews.llvm.org/D93628 --- clang/lib/Frontend/CompilerInvocation.cpp | 35 +++++++++-------------- 1 file changed, 14 insertions(+), 21 deletions(-) diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index d7c1a6ffd600f..44453ad462c44 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -157,33 +157,26 @@ static void denormalizeSimpleFlag(SmallVectorImpl &Args, Args.push_back(Spelling); } -namespace { -template struct FlagToValueNormalizer { - T Value; +template static constexpr bool is_uint64_t_convertible() { + return !std::is_same::value && + llvm::is_integral_or_enum::value; +} - Optional operator()(OptSpecifier Opt, unsigned, const ArgList &Args, - DiagnosticsEngine &) { +template (), bool> = false> +static auto makeFlagToValueNormalizer(T Value) { + return [Value](OptSpecifier Opt, unsigned, const ArgList &Args, + DiagnosticsEngine &) -> Optional { if (Args.hasArg(Opt)) return Value; return None; - } -}; -} // namespace - -template static constexpr bool is_int_convertible() { - return sizeof(T) <= sizeof(uint64_t) && - std::is_trivially_constructible::value && - std::is_trivially_constructible::value; -} - -template (), bool> = false> -static FlagToValueNormalizer makeFlagToValueNormalizer(T Value) { - return FlagToValueNormalizer{Value}; + }; } -template (), bool> = false> -static FlagToValueNormalizer makeFlagToValueNormalizer(T Value) { - return FlagToValueNormalizer{std::move(Value)}; +template (), bool> = false> +static auto makeFlagToValueNormalizer(T Value) { + return makeFlagToValueNormalizer(uint64_t(Value)); } static auto makeBooleanOptionNormalizer(bool Value, bool OtherValue, From 5b37f0d97087c39ef635b3f7574ace9aa173d417 Mon Sep 17 00:00:00 2001 From: "Paul C. Anagnostopoulos" Date: Tue, 15 Dec 2020 12:47:27 -0500 Subject: [PATCH 093/378] [MCInstrDesc] [TableGen] Reduce size of MCOperandInfo instances. Differential Revision: https://reviews.llvm.org/D93326 --- llvm/include/llvm/MC/MCInstrDesc.h | 31 +++++++++++++++--------- llvm/utils/TableGen/InstrInfoEmitter.cpp | 5 ++-- 2 files changed, 22 insertions(+), 14 deletions(-) diff --git a/llvm/include/llvm/MC/MCInstrDesc.h b/llvm/include/llvm/MC/MCInstrDesc.h index 17454e3134a2f..cbb061fc64569 100644 --- a/llvm/include/llvm/MC/MCInstrDesc.h +++ b/llvm/include/llvm/MC/MCInstrDesc.h @@ -27,12 +27,22 @@ class MCInst; //===----------------------------------------------------------------------===// namespace MCOI { -// Operand constraints +/// Operand constraints. These are encoded in 16 bits with one of the +/// low-order 3 bits specifying that a constraint is present and the +/// corresponding high-order hex digit specifying the constraint value. +/// This allows for a maximum of 3 constraints. enum OperandConstraint { - TIED_TO = 0, // Must be allocated the same register as. - EARLY_CLOBBER // Operand is an early clobber register operand + TIED_TO = 0, // Must be allocated the same register as specified value. + EARLY_CLOBBER // If present, operand is an early clobber register. }; +// Define a macro to produce each constraint value. +#define MCOI_TIED_TO(op) \ + ((1 << MCOI::TIED_TO) | ((op) << (4 + MCOI::TIED_TO * 4))) + +#define MCOI_EARLY_CLOBBER \ + (1 << MCOI::EARLY_CLOBBER) + /// These are flags set on operands, but should be considered /// private, all access should go through the MCOperandInfo accessors. /// See the accessors for a description of what these are. @@ -84,10 +94,9 @@ class MCOperandInfo { /// Information about the type of the operand. uint8_t OperandType; - /// The lower 16 bits are used to specify which constraints are set. - /// The higher 16 bits are used to specify the value of constraints (4 bits - /// each). - uint32_t Constraints; + + /// Operand constraints (see OperandConstraint enum). + uint16_t Constraints; /// Set if this operand is a pointer value and it requires a callback /// to look up its register class. @@ -197,14 +206,14 @@ class MCInstrDesc { const MCPhysReg *ImplicitDefs; // Registers implicitly defined by this instr const MCOperandInfo *OpInfo; // 'NumOperands' entries about operands - /// Returns the value of the specific constraint if - /// it is set. Returns -1 if it is not set. + /// Returns the value of the specified operand constraint if + /// it is present. Returns -1 if it is not present. int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const { if (OpNum < NumOperands && (OpInfo[OpNum].Constraints & (1 << Constraint))) { - unsigned Pos = 16 + Constraint * 4; - return (int)(OpInfo[OpNum].Constraints >> Pos) & 0xf; + unsigned ValuePos = 4 + Constraint * 4; + return (int)(OpInfo[OpNum].Constraints >> ValuePos) & 0x0f; } return -1; } diff --git a/llvm/utils/TableGen/InstrInfoEmitter.cpp b/llvm/utils/TableGen/InstrInfoEmitter.cpp index 156fa6d18d2ee..71d8eadaa61e4 100644 --- a/llvm/utils/TableGen/InstrInfoEmitter.cpp +++ b/llvm/utils/TableGen/InstrInfoEmitter.cpp @@ -182,11 +182,10 @@ InstrInfoEmitter::GetOperandInfo(const CodeGenInstruction &Inst) { if (Constraint.isNone()) Res += "0"; else if (Constraint.isEarlyClobber()) - Res += "(1 << MCOI::EARLY_CLOBBER)"; + Res += "MCOI_EARLY_CLOBBER"; else { assert(Constraint.isTied()); - Res += "((" + utostr(Constraint.getTiedOperand()) + - " << 16) | (1 << MCOI::TIED_TO))"; + Res += "MCOI_TIED_TO(" + utostr(Constraint.getTiedOperand()) + ")"; } Result.push_back(Res); From ef4dbb2b7a85b47bfd84188bd1c6a9eddc5c536b Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Tue, 22 Dec 2020 14:44:09 +0000 Subject: [PATCH 094/378] [LV] Use ScalarEvolution::getURemExpr to reduce duplication. ScalarEvolution should be able to handle both constant and variable trip counts using getURemExpr, so we do not have to handle them separately. This is a small simplification of a56280094e08. Reviewed By: gilr Differential Revision: https://reviews.llvm.org/D93677 --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 0b9e660c987a5..6ab8e5884a76b 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -5506,20 +5506,15 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) { "MaxVF must be a power of 2"); unsigned MaxVFtimesIC = UserIC ? MaxVF.getFixedValue() * UserIC : MaxVF.getFixedValue(); - if (TC > 0 && TC % MaxVFtimesIC == 0) { - // Accept MaxVF if we do not have a tail. - LLVM_DEBUG(dbgs() << "LV: No tail will remain for any chosen VF.\n"); - return MaxVF; - } - // Avoid tail folding if the trip count is known to be a multiple of any VF we // chose. ScalarEvolution *SE = PSE.getSE(); const SCEV *BackedgeTakenCount = PSE.getBackedgeTakenCount(); const SCEV *ExitCount = SE->getAddExpr( BackedgeTakenCount, SE->getOne(BackedgeTakenCount->getType())); - unsigned TCisMultipleOf = 1 << SE->GetMinTrailingZeros(ExitCount); - if (TCisMultipleOf % MaxVFtimesIC == 0) { + const SCEV *Rem = SE->getURemExpr( + ExitCount, SE->getConstant(BackedgeTakenCount->getType(), MaxVFtimesIC)); + if (Rem->isZero()) { // Accept MaxVF if we do not have a tail. LLVM_DEBUG(dbgs() << "LV: No tail will remain for any chosen VF.\n"); return MaxVF; From 28b00ba731160f882f4e60e4a71896d5b878b52d Mon Sep 17 00:00:00 2001 From: clementval Date: Tue, 22 Dec 2020 09:59:50 -0500 Subject: [PATCH 095/378] [openacc][openmp][NFC] Fix typo in comments --- llvm/utils/TableGen/DirectiveEmitter.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/utils/TableGen/DirectiveEmitter.cpp b/llvm/utils/TableGen/DirectiveEmitter.cpp index 6dee193899968..9e8aa4d9f74fb 100644 --- a/llvm/utils/TableGen/DirectiveEmitter.cpp +++ b/llvm/utils/TableGen/DirectiveEmitter.cpp @@ -649,7 +649,7 @@ void GenerateFlangClauseUnparse(const DirectiveLanguage &DirLang, } } -// Generate the implemenation section for the enumeration in the directive +// Generate the implementation section for the enumeration in the directive // language void EmitDirectivesFlangImpl(const DirectiveLanguage &DirLang, raw_ostream &OS) { @@ -733,7 +733,7 @@ void GenerateClauseClassMacro(const DirectiveLanguage &DirLang, OS << "#undef CLAUSE\n"; } -// Generate the implemenation section for the enumeration in the directive +// Generate the implementation section for the enumeration in the directive // language. void EmitDirectivesGen(RecordKeeper &Records, raw_ostream &OS) { const auto DirLang = DirectiveLanguage{Records}; @@ -745,7 +745,7 @@ void EmitDirectivesGen(RecordKeeper &Records, raw_ostream &OS) { GenerateClauseClassMacro(DirLang, OS); } -// Generate the implemenation for the enumeration in the directive +// Generate the implementation for the enumeration in the directive // language. This code can be included in library. void EmitDirectivesImpl(RecordKeeper &Records, raw_ostream &OS) { const auto DirLang = DirectiveLanguage{Records}; From 0586f048d794673867f7307898114a111f6e80ba Mon Sep 17 00:00:00 2001 From: Nandor Licker Date: Wed, 25 Nov 2020 14:01:19 +0000 Subject: [PATCH 096/378] [RISCV] Basic jump table lowering This patch enables jump table lowering in the RISC-V backend. In addition to the test case included, the new lowering was tested by compiling the OCaml runtime and running it under qemu. Differential Revision: https://reviews.llvm.org/D92097 --- .../llvm/CodeGen/MachineInstrBuilder.h | 3 + llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 18 +- llvm/lib/Target/RISCV/RISCVISelLowering.h | 1 + llvm/lib/Target/RISCV/RISCVMCInstLower.cpp | 3 + llvm/test/CodeGen/RISCV/jumptable.ll | 349 ++++++++++++++++-- 5 files changed, 338 insertions(+), 36 deletions(-) diff --git a/llvm/include/llvm/CodeGen/MachineInstrBuilder.h b/llvm/include/llvm/CodeGen/MachineInstrBuilder.h index b31e9cdb0e903..115c501756043 100644 --- a/llvm/include/llvm/CodeGen/MachineInstrBuilder.h +++ b/llvm/include/llvm/CodeGen/MachineInstrBuilder.h @@ -305,6 +305,9 @@ class MachineInstrBuilder { case MachineOperand::MO_BlockAddress: return addBlockAddress(Disp.getBlockAddress(), Disp.getOffset() + off, TargetFlags); + case MachineOperand::MO_JumpTableIndex: + assert(off == 0 && "cannot create offset into jump tables"); + return addJumpTableIndex(Disp.getIndex(), TargetFlags); } } diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 5334666baf228..22fe3688a46b7 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -323,6 +323,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::GlobalAddress, XLenVT, Custom); setOperationAction(ISD::BlockAddress, XLenVT, Custom); setOperationAction(ISD::ConstantPool, XLenVT, Custom); + setOperationAction(ISD::JumpTable, XLenVT, Custom); setOperationAction(ISD::GlobalTLSAddress, XLenVT, Custom); @@ -367,8 +368,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setMinFunctionAlignment(FunctionAlignment); setPrefFunctionAlignment(FunctionAlignment); - // Effectively disable jump table generation. - setMinimumJumpTableEntries(INT_MAX); + setMinimumJumpTableEntries(5); // Jumps are expensive, compared to logic setJumpIsExpensive(); @@ -565,6 +565,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, return lowerBlockAddress(Op, DAG); case ISD::ConstantPool: return lowerConstantPool(Op, DAG); + case ISD::JumpTable: + return lowerJumpTable(Op, DAG); case ISD::GlobalTLSAddress: return lowerGlobalTLSAddress(Op, DAG); case ISD::SELECT: @@ -643,6 +645,11 @@ static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty, N->getOffset(), Flags); } +static SDValue getTargetNode(JumpTableSDNode *N, SDLoc DL, EVT Ty, + SelectionDAG &DAG, unsigned Flags) { + return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags); +} + template SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG, bool IsLocal) const { @@ -720,6 +727,13 @@ SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op, return getAddr(N, DAG); } +SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op, + SelectionDAG &DAG) const { + JumpTableSDNode *N = cast(Op); + + return getAddr(N, DAG); +} + SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N, SelectionDAG &DAG, bool UseGOT) const { diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h index 28ce459e30278..dabf2b199a884 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -257,6 +257,7 @@ class RISCVTargetLowering : public TargetLowering { SDValue lowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; SDValue lowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; SDValue lowerConstantPool(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerJumpTable(SDValue Op, SelectionDAG &DAG) const; SDValue lowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; SDValue lowerSELECT(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVASTART(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/RISCV/RISCVMCInstLower.cpp b/llvm/lib/Target/RISCV/RISCVMCInstLower.cpp index 4fd060a650140..a93a1e38c6569 100644 --- a/llvm/lib/Target/RISCV/RISCVMCInstLower.cpp +++ b/llvm/lib/Target/RISCV/RISCVMCInstLower.cpp @@ -122,6 +122,9 @@ bool llvm::LowerRISCVMachineOperandToMCOperand(const MachineOperand &MO, case MachineOperand::MO_ConstantPoolIndex: MCOp = lowerSymbolOperand(MO, AP.GetCPISymbol(MO.getIndex()), AP); break; + case MachineOperand::MO_JumpTableIndex: + MCOp = lowerSymbolOperand(MO, AP.GetJTISymbol(MO.getIndex()), AP); + break; } return true; } diff --git a/llvm/test/CodeGen/RISCV/jumptable.ll b/llvm/test/CodeGen/RISCV/jumptable.ll index 5a5f5b65111a1..0eebc698759d3 100644 --- a/llvm/test/CodeGen/RISCV/jumptable.ll +++ b/llvm/test/CodeGen/RISCV/jumptable.ll @@ -1,39 +1,145 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ -; RUN: | FileCheck %s -check-prefix=RV32I +; RUN: llc -mtriple=riscv32 -code-model=small -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32I-SMALL +; RUN: llc -mtriple=riscv32 -code-model=medium -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32I-MEDIUM +; RUN: llc -mtriple=riscv64 -code-model=small -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64I-SMALL +; RUN: llc -mtriple=riscv64 -code-model=medium -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64I-MEDIUM -define void @jt(i32 %in, i32* %out) nounwind { -; RV32I-LABEL: jt: -; RV32I: # %bb.0: # %entry -; RV32I-NEXT: addi a2, zero, 2 -; RV32I-NEXT: blt a2, a0, .LBB0_4 -; RV32I-NEXT: # %bb.1: # %entry -; RV32I-NEXT: addi a2, zero, 1 -; RV32I-NEXT: beq a0, a2, .LBB0_7 -; RV32I-NEXT: # %bb.2: # %entry -; RV32I-NEXT: addi a2, zero, 2 -; RV32I-NEXT: bne a0, a2, .LBB0_10 -; RV32I-NEXT: # %bb.3: # %bb2 -; RV32I-NEXT: addi a0, zero, 3 -; RV32I-NEXT: j .LBB0_9 -; RV32I-NEXT: .LBB0_4: # %entry -; RV32I-NEXT: addi a2, zero, 3 -; RV32I-NEXT: beq a0, a2, .LBB0_8 -; RV32I-NEXT: # %bb.5: # %entry -; RV32I-NEXT: addi a2, zero, 4 -; RV32I-NEXT: bne a0, a2, .LBB0_10 -; RV32I-NEXT: # %bb.6: # %bb4 -; RV32I-NEXT: addi a0, zero, 1 -; RV32I-NEXT: j .LBB0_9 -; RV32I-NEXT: .LBB0_7: # %bb1 -; RV32I-NEXT: addi a0, zero, 4 -; RV32I-NEXT: j .LBB0_9 -; RV32I-NEXT: .LBB0_8: # %bb3 -; RV32I-NEXT: addi a0, zero, 2 -; RV32I-NEXT: .LBB0_9: # %exit -; RV32I-NEXT: sw a0, 0(a1) -; RV32I-NEXT: .LBB0_10: # %exit -; RV32I-NEXT: ret +define void @below_threshold(i32 %in, i32* %out) nounwind { +; RV32I-SMALL-LABEL: below_threshold: +; RV32I-SMALL: # %bb.0: # %entry +; RV32I-SMALL-NEXT: addi a2, zero, 2 +; RV32I-SMALL-NEXT: blt a2, a0, .LBB0_4 +; RV32I-SMALL-NEXT: # %bb.1: # %entry +; RV32I-SMALL-NEXT: addi a2, zero, 1 +; RV32I-SMALL-NEXT: beq a0, a2, .LBB0_7 +; RV32I-SMALL-NEXT: # %bb.2: # %entry +; RV32I-SMALL-NEXT: addi a2, zero, 2 +; RV32I-SMALL-NEXT: bne a0, a2, .LBB0_10 +; RV32I-SMALL-NEXT: # %bb.3: # %bb2 +; RV32I-SMALL-NEXT: addi a0, zero, 3 +; RV32I-SMALL-NEXT: j .LBB0_9 +; RV32I-SMALL-NEXT: .LBB0_4: # %entry +; RV32I-SMALL-NEXT: addi a2, zero, 3 +; RV32I-SMALL-NEXT: beq a0, a2, .LBB0_8 +; RV32I-SMALL-NEXT: # %bb.5: # %entry +; RV32I-SMALL-NEXT: addi a2, zero, 4 +; RV32I-SMALL-NEXT: bne a0, a2, .LBB0_10 +; RV32I-SMALL-NEXT: # %bb.6: # %bb4 +; RV32I-SMALL-NEXT: addi a0, zero, 1 +; RV32I-SMALL-NEXT: j .LBB0_9 +; RV32I-SMALL-NEXT: .LBB0_7: # %bb1 +; RV32I-SMALL-NEXT: addi a0, zero, 4 +; RV32I-SMALL-NEXT: j .LBB0_9 +; RV32I-SMALL-NEXT: .LBB0_8: # %bb3 +; RV32I-SMALL-NEXT: addi a0, zero, 2 +; RV32I-SMALL-NEXT: .LBB0_9: # %exit +; RV32I-SMALL-NEXT: sw a0, 0(a1) +; RV32I-SMALL-NEXT: .LBB0_10: # %exit +; RV32I-SMALL-NEXT: ret +; +; RV32I-MEDIUM-LABEL: below_threshold: +; RV32I-MEDIUM: # %bb.0: # %entry +; RV32I-MEDIUM-NEXT: addi a2, zero, 2 +; RV32I-MEDIUM-NEXT: blt a2, a0, .LBB0_4 +; RV32I-MEDIUM-NEXT: # %bb.1: # %entry +; RV32I-MEDIUM-NEXT: addi a2, zero, 1 +; RV32I-MEDIUM-NEXT: beq a0, a2, .LBB0_7 +; RV32I-MEDIUM-NEXT: # %bb.2: # %entry +; RV32I-MEDIUM-NEXT: addi a2, zero, 2 +; RV32I-MEDIUM-NEXT: bne a0, a2, .LBB0_10 +; RV32I-MEDIUM-NEXT: # %bb.3: # %bb2 +; RV32I-MEDIUM-NEXT: addi a0, zero, 3 +; RV32I-MEDIUM-NEXT: j .LBB0_9 +; RV32I-MEDIUM-NEXT: .LBB0_4: # %entry +; RV32I-MEDIUM-NEXT: addi a2, zero, 3 +; RV32I-MEDIUM-NEXT: beq a0, a2, .LBB0_8 +; RV32I-MEDIUM-NEXT: # %bb.5: # %entry +; RV32I-MEDIUM-NEXT: addi a2, zero, 4 +; RV32I-MEDIUM-NEXT: bne a0, a2, .LBB0_10 +; RV32I-MEDIUM-NEXT: # %bb.6: # %bb4 +; RV32I-MEDIUM-NEXT: addi a0, zero, 1 +; RV32I-MEDIUM-NEXT: j .LBB0_9 +; RV32I-MEDIUM-NEXT: .LBB0_7: # %bb1 +; RV32I-MEDIUM-NEXT: addi a0, zero, 4 +; RV32I-MEDIUM-NEXT: j .LBB0_9 +; RV32I-MEDIUM-NEXT: .LBB0_8: # %bb3 +; RV32I-MEDIUM-NEXT: addi a0, zero, 2 +; RV32I-MEDIUM-NEXT: .LBB0_9: # %exit +; RV32I-MEDIUM-NEXT: sw a0, 0(a1) +; RV32I-MEDIUM-NEXT: .LBB0_10: # %exit +; RV32I-MEDIUM-NEXT: ret +; +; RV64I-SMALL-LABEL: below_threshold: +; RV64I-SMALL: # %bb.0: # %entry +; RV64I-SMALL-NEXT: slli a0, a0, 32 +; RV64I-SMALL-NEXT: srli a0, a0, 32 +; RV64I-SMALL-NEXT: addi a2, zero, 2 +; RV64I-SMALL-NEXT: blt a2, a0, .LBB0_4 +; RV64I-SMALL-NEXT: # %bb.1: # %entry +; RV64I-SMALL-NEXT: addi a2, zero, 1 +; RV64I-SMALL-NEXT: beq a0, a2, .LBB0_7 +; RV64I-SMALL-NEXT: # %bb.2: # %entry +; RV64I-SMALL-NEXT: addi a2, zero, 2 +; RV64I-SMALL-NEXT: bne a0, a2, .LBB0_10 +; RV64I-SMALL-NEXT: # %bb.3: # %bb2 +; RV64I-SMALL-NEXT: addi a0, zero, 3 +; RV64I-SMALL-NEXT: j .LBB0_9 +; RV64I-SMALL-NEXT: .LBB0_4: # %entry +; RV64I-SMALL-NEXT: addi a2, zero, 3 +; RV64I-SMALL-NEXT: beq a0, a2, .LBB0_8 +; RV64I-SMALL-NEXT: # %bb.5: # %entry +; RV64I-SMALL-NEXT: addi a2, zero, 4 +; RV64I-SMALL-NEXT: bne a0, a2, .LBB0_10 +; RV64I-SMALL-NEXT: # %bb.6: # %bb4 +; RV64I-SMALL-NEXT: addi a0, zero, 1 +; RV64I-SMALL-NEXT: j .LBB0_9 +; RV64I-SMALL-NEXT: .LBB0_7: # %bb1 +; RV64I-SMALL-NEXT: addi a0, zero, 4 +; RV64I-SMALL-NEXT: j .LBB0_9 +; RV64I-SMALL-NEXT: .LBB0_8: # %bb3 +; RV64I-SMALL-NEXT: addi a0, zero, 2 +; RV64I-SMALL-NEXT: .LBB0_9: # %exit +; RV64I-SMALL-NEXT: sw a0, 0(a1) +; RV64I-SMALL-NEXT: .LBB0_10: # %exit +; RV64I-SMALL-NEXT: ret +; +; RV64I-MEDIUM-LABEL: below_threshold: +; RV64I-MEDIUM: # %bb.0: # %entry +; RV64I-MEDIUM-NEXT: slli a0, a0, 32 +; RV64I-MEDIUM-NEXT: srli a0, a0, 32 +; RV64I-MEDIUM-NEXT: addi a2, zero, 2 +; RV64I-MEDIUM-NEXT: blt a2, a0, .LBB0_4 +; RV64I-MEDIUM-NEXT: # %bb.1: # %entry +; RV64I-MEDIUM-NEXT: addi a2, zero, 1 +; RV64I-MEDIUM-NEXT: beq a0, a2, .LBB0_7 +; RV64I-MEDIUM-NEXT: # %bb.2: # %entry +; RV64I-MEDIUM-NEXT: addi a2, zero, 2 +; RV64I-MEDIUM-NEXT: bne a0, a2, .LBB0_10 +; RV64I-MEDIUM-NEXT: # %bb.3: # %bb2 +; RV64I-MEDIUM-NEXT: addi a0, zero, 3 +; RV64I-MEDIUM-NEXT: j .LBB0_9 +; RV64I-MEDIUM-NEXT: .LBB0_4: # %entry +; RV64I-MEDIUM-NEXT: addi a2, zero, 3 +; RV64I-MEDIUM-NEXT: beq a0, a2, .LBB0_8 +; RV64I-MEDIUM-NEXT: # %bb.5: # %entry +; RV64I-MEDIUM-NEXT: addi a2, zero, 4 +; RV64I-MEDIUM-NEXT: bne a0, a2, .LBB0_10 +; RV64I-MEDIUM-NEXT: # %bb.6: # %bb4 +; RV64I-MEDIUM-NEXT: addi a0, zero, 1 +; RV64I-MEDIUM-NEXT: j .LBB0_9 +; RV64I-MEDIUM-NEXT: .LBB0_7: # %bb1 +; RV64I-MEDIUM-NEXT: addi a0, zero, 4 +; RV64I-MEDIUM-NEXT: j .LBB0_9 +; RV64I-MEDIUM-NEXT: .LBB0_8: # %bb3 +; RV64I-MEDIUM-NEXT: addi a0, zero, 2 +; RV64I-MEDIUM-NEXT: .LBB0_9: # %exit +; RV64I-MEDIUM-NEXT: sw a0, 0(a1) +; RV64I-MEDIUM-NEXT: .LBB0_10: # %exit +; RV64I-MEDIUM-NEXT: ret entry: switch i32 %in, label %exit [ i32 1, label %bb1 @@ -56,3 +162,178 @@ bb4: exit: ret void } + +define void @above_threshold(i32 %in, i32* %out) nounwind { +; RV32I-SMALL-LABEL: above_threshold: +; RV32I-SMALL: # %bb.0: # %entry +; RV32I-SMALL-NEXT: addi a0, a0, -1 +; RV32I-SMALL-NEXT: addi a2, zero, 5 +; RV32I-SMALL-NEXT: bltu a2, a0, .LBB1_9 +; RV32I-SMALL-NEXT: # %bb.1: # %entry +; RV32I-SMALL-NEXT: slli a0, a0, 2 +; RV32I-SMALL-NEXT: lui a2, %hi(.LJTI1_0) +; RV32I-SMALL-NEXT: addi a2, a2, %lo(.LJTI1_0) +; RV32I-SMALL-NEXT: add a0, a0, a2 +; RV32I-SMALL-NEXT: lw a0, 0(a0) +; RV32I-SMALL-NEXT: jr a0 +; RV32I-SMALL-NEXT: .LBB1_2: # %bb1 +; RV32I-SMALL-NEXT: addi a0, zero, 4 +; RV32I-SMALL-NEXT: j .LBB1_8 +; RV32I-SMALL-NEXT: .LBB1_3: # %bb2 +; RV32I-SMALL-NEXT: addi a0, zero, 3 +; RV32I-SMALL-NEXT: j .LBB1_8 +; RV32I-SMALL-NEXT: .LBB1_4: # %bb3 +; RV32I-SMALL-NEXT: addi a0, zero, 2 +; RV32I-SMALL-NEXT: j .LBB1_8 +; RV32I-SMALL-NEXT: .LBB1_5: # %bb4 +; RV32I-SMALL-NEXT: addi a0, zero, 1 +; RV32I-SMALL-NEXT: j .LBB1_8 +; RV32I-SMALL-NEXT: .LBB1_6: # %bb5 +; RV32I-SMALL-NEXT: addi a0, zero, 100 +; RV32I-SMALL-NEXT: j .LBB1_8 +; RV32I-SMALL-NEXT: .LBB1_7: # %bb6 +; RV32I-SMALL-NEXT: addi a0, zero, 200 +; RV32I-SMALL-NEXT: .LBB1_8: # %exit +; RV32I-SMALL-NEXT: sw a0, 0(a1) +; RV32I-SMALL-NEXT: .LBB1_9: # %exit +; RV32I-SMALL-NEXT: ret +; +; RV32I-MEDIUM-LABEL: above_threshold: +; RV32I-MEDIUM: # %bb.0: # %entry +; RV32I-MEDIUM-NEXT: addi a0, a0, -1 +; RV32I-MEDIUM-NEXT: addi a2, zero, 5 +; RV32I-MEDIUM-NEXT: bltu a2, a0, .LBB1_9 +; RV32I-MEDIUM-NEXT: # %bb.1: # %entry +; RV32I-MEDIUM-NEXT: slli a0, a0, 2 +; RV32I-MEDIUM-NEXT: .LBB1_10: # %entry +; RV32I-MEDIUM-NEXT: # Label of block must be emitted +; RV32I-MEDIUM-NEXT: auipc a2, %pcrel_hi(.LJTI1_0) +; RV32I-MEDIUM-NEXT: addi a2, a2, %pcrel_lo(.LBB1_10) +; RV32I-MEDIUM-NEXT: add a0, a0, a2 +; RV32I-MEDIUM-NEXT: lw a0, 0(a0) +; RV32I-MEDIUM-NEXT: jr a0 +; RV32I-MEDIUM-NEXT: .LBB1_2: # %bb1 +; RV32I-MEDIUM-NEXT: addi a0, zero, 4 +; RV32I-MEDIUM-NEXT: j .LBB1_8 +; RV32I-MEDIUM-NEXT: .LBB1_3: # %bb2 +; RV32I-MEDIUM-NEXT: addi a0, zero, 3 +; RV32I-MEDIUM-NEXT: j .LBB1_8 +; RV32I-MEDIUM-NEXT: .LBB1_4: # %bb3 +; RV32I-MEDIUM-NEXT: addi a0, zero, 2 +; RV32I-MEDIUM-NEXT: j .LBB1_8 +; RV32I-MEDIUM-NEXT: .LBB1_5: # %bb4 +; RV32I-MEDIUM-NEXT: addi a0, zero, 1 +; RV32I-MEDIUM-NEXT: j .LBB1_8 +; RV32I-MEDIUM-NEXT: .LBB1_6: # %bb5 +; RV32I-MEDIUM-NEXT: addi a0, zero, 100 +; RV32I-MEDIUM-NEXT: j .LBB1_8 +; RV32I-MEDIUM-NEXT: .LBB1_7: # %bb6 +; RV32I-MEDIUM-NEXT: addi a0, zero, 200 +; RV32I-MEDIUM-NEXT: .LBB1_8: # %exit +; RV32I-MEDIUM-NEXT: sw a0, 0(a1) +; RV32I-MEDIUM-NEXT: .LBB1_9: # %exit +; RV32I-MEDIUM-NEXT: ret +; +; RV64I-SMALL-LABEL: above_threshold: +; RV64I-SMALL: # %bb.0: # %entry +; RV64I-SMALL-NEXT: slli a0, a0, 32 +; RV64I-SMALL-NEXT: srli a0, a0, 32 +; RV64I-SMALL-NEXT: addi a0, a0, -1 +; RV64I-SMALL-NEXT: addi a2, zero, 5 +; RV64I-SMALL-NEXT: bltu a2, a0, .LBB1_9 +; RV64I-SMALL-NEXT: # %bb.1: # %entry +; RV64I-SMALL-NEXT: slli a0, a0, 3 +; RV64I-SMALL-NEXT: lui a2, %hi(.LJTI1_0) +; RV64I-SMALL-NEXT: addi a2, a2, %lo(.LJTI1_0) +; RV64I-SMALL-NEXT: add a0, a0, a2 +; RV64I-SMALL-NEXT: ld a0, 0(a0) +; RV64I-SMALL-NEXT: jr a0 +; RV64I-SMALL-NEXT: .LBB1_2: # %bb1 +; RV64I-SMALL-NEXT: addi a0, zero, 4 +; RV64I-SMALL-NEXT: j .LBB1_8 +; RV64I-SMALL-NEXT: .LBB1_3: # %bb2 +; RV64I-SMALL-NEXT: addi a0, zero, 3 +; RV64I-SMALL-NEXT: j .LBB1_8 +; RV64I-SMALL-NEXT: .LBB1_4: # %bb3 +; RV64I-SMALL-NEXT: addi a0, zero, 2 +; RV64I-SMALL-NEXT: j .LBB1_8 +; RV64I-SMALL-NEXT: .LBB1_5: # %bb4 +; RV64I-SMALL-NEXT: addi a0, zero, 1 +; RV64I-SMALL-NEXT: j .LBB1_8 +; RV64I-SMALL-NEXT: .LBB1_6: # %bb5 +; RV64I-SMALL-NEXT: addi a0, zero, 100 +; RV64I-SMALL-NEXT: j .LBB1_8 +; RV64I-SMALL-NEXT: .LBB1_7: # %bb6 +; RV64I-SMALL-NEXT: addi a0, zero, 200 +; RV64I-SMALL-NEXT: .LBB1_8: # %exit +; RV64I-SMALL-NEXT: sw a0, 0(a1) +; RV64I-SMALL-NEXT: .LBB1_9: # %exit +; RV64I-SMALL-NEXT: ret +; +; RV64I-MEDIUM-LABEL: above_threshold: +; RV64I-MEDIUM: # %bb.0: # %entry +; RV64I-MEDIUM-NEXT: slli a0, a0, 32 +; RV64I-MEDIUM-NEXT: srli a0, a0, 32 +; RV64I-MEDIUM-NEXT: addi a0, a0, -1 +; RV64I-MEDIUM-NEXT: addi a2, zero, 5 +; RV64I-MEDIUM-NEXT: bltu a2, a0, .LBB1_9 +; RV64I-MEDIUM-NEXT: # %bb.1: # %entry +; RV64I-MEDIUM-NEXT: slli a0, a0, 3 +; RV64I-MEDIUM-NEXT: .LBB1_10: # %entry +; RV64I-MEDIUM-NEXT: # Label of block must be emitted +; RV64I-MEDIUM-NEXT: auipc a2, %pcrel_hi(.LJTI1_0) +; RV64I-MEDIUM-NEXT: addi a2, a2, %pcrel_lo(.LBB1_10) +; RV64I-MEDIUM-NEXT: add a0, a0, a2 +; RV64I-MEDIUM-NEXT: ld a0, 0(a0) +; RV64I-MEDIUM-NEXT: jr a0 +; RV64I-MEDIUM-NEXT: .LBB1_2: # %bb1 +; RV64I-MEDIUM-NEXT: addi a0, zero, 4 +; RV64I-MEDIUM-NEXT: j .LBB1_8 +; RV64I-MEDIUM-NEXT: .LBB1_3: # %bb2 +; RV64I-MEDIUM-NEXT: addi a0, zero, 3 +; RV64I-MEDIUM-NEXT: j .LBB1_8 +; RV64I-MEDIUM-NEXT: .LBB1_4: # %bb3 +; RV64I-MEDIUM-NEXT: addi a0, zero, 2 +; RV64I-MEDIUM-NEXT: j .LBB1_8 +; RV64I-MEDIUM-NEXT: .LBB1_5: # %bb4 +; RV64I-MEDIUM-NEXT: addi a0, zero, 1 +; RV64I-MEDIUM-NEXT: j .LBB1_8 +; RV64I-MEDIUM-NEXT: .LBB1_6: # %bb5 +; RV64I-MEDIUM-NEXT: addi a0, zero, 100 +; RV64I-MEDIUM-NEXT: j .LBB1_8 +; RV64I-MEDIUM-NEXT: .LBB1_7: # %bb6 +; RV64I-MEDIUM-NEXT: addi a0, zero, 200 +; RV64I-MEDIUM-NEXT: .LBB1_8: # %exit +; RV64I-MEDIUM-NEXT: sw a0, 0(a1) +; RV64I-MEDIUM-NEXT: .LBB1_9: # %exit +; RV64I-MEDIUM-NEXT: ret +entry: + switch i32 %in, label %exit [ + i32 1, label %bb1 + i32 2, label %bb2 + i32 3, label %bb3 + i32 4, label %bb4 + i32 5, label %bb5 + i32 6, label %bb6 + ] +bb1: + store i32 4, i32* %out + br label %exit +bb2: + store i32 3, i32* %out + br label %exit +bb3: + store i32 2, i32* %out + br label %exit +bb4: + store i32 1, i32* %out + br label %exit +bb5: + store i32 100, i32* %out + br label %exit +bb6: + store i32 200, i32* %out + br label %exit +exit: + ret void +} From 3e07b0b9d3363fb767cbbaa2593fa91ac393fb7e Mon Sep 17 00:00:00 2001 From: Prateek Gupta Date: Tue, 22 Dec 2020 21:40:17 +0530 Subject: [PATCH 097/378] [MLIR] Fix lowering of affine operations with return values This commit addresses the issue of lowering affine.for and affine.parallel having return values. Relevant test cases are also added. Signed-off-by: Prateek Gupta Differential Revision: https://reviews.llvm.org/D93090 --- .../AffineToStandard/AffineToStandard.cpp | 125 ++++++++++++++++-- .../AffineToStandard/lower-affine.mlir | 125 ++++++++++++++++++ 2 files changed, 236 insertions(+), 14 deletions(-) diff --git a/mlir/lib/Conversion/AffineToStandard/AffineToStandard.cpp b/mlir/lib/Conversion/AffineToStandard/AffineToStandard.cpp index 58f44b6ed2078..8721e6b96ed7d 100644 --- a/mlir/lib/Conversion/AffineToStandard/AffineToStandard.cpp +++ b/mlir/lib/Conversion/AffineToStandard/AffineToStandard.cpp @@ -334,7 +334,13 @@ class AffineYieldOpLowering : public OpRewritePattern { LogicalResult matchAndRewrite(AffineYieldOp op, PatternRewriter &rewriter) const override { - rewriter.replaceOpWithNewOp(op); + if (isa(op.getParentOp())) { + // scf.parallel does not yield any values via its terminator scf.yield but + // models reductions differently using additional ops in its region. + rewriter.replaceOpWithNewOp(op); + return success(); + } + rewriter.replaceOpWithNewOp(op, op.operands()); return success(); } }; @@ -349,14 +355,55 @@ class AffineForLowering : public OpRewritePattern { Value lowerBound = lowerAffineLowerBound(op, rewriter); Value upperBound = lowerAffineUpperBound(op, rewriter); Value step = rewriter.create(loc, op.getStep()); - auto f = rewriter.create(loc, lowerBound, upperBound, step); - rewriter.eraseBlock(f.getBody()); - rewriter.inlineRegionBefore(op.region(), f.region(), f.region().end()); - rewriter.eraseOp(op); + auto scfForOp = rewriter.create(loc, lowerBound, upperBound, + step, op.getIterOperands()); + rewriter.eraseBlock(scfForOp.getBody()); + rewriter.inlineRegionBefore(op.region(), scfForOp.region(), + scfForOp.region().end()); + rewriter.replaceOp(op, scfForOp.results()); return success(); } }; +/// Returns the identity value associated with an AtomicRMWKind op. +static Value getIdentityValue(AtomicRMWKind op, OpBuilder &builder, + Location loc) { + switch (op) { + case AtomicRMWKind::addf: + return builder.create(loc, builder.getF32FloatAttr(0)); + case AtomicRMWKind::addi: + return builder.create(loc, builder.getI32IntegerAttr(0)); + case AtomicRMWKind::mulf: + return builder.create(loc, builder.getF32FloatAttr(1)); + case AtomicRMWKind::muli: + return builder.create(loc, builder.getI32IntegerAttr(1)); + // TODO: Add remaining reduction operations. + default: + emitOptionalError(loc, "Reduction operation type not supported"); + } + return nullptr; +} + +/// Return the value obtained by applying the reduction operation kind +/// associated with a binary AtomicRMWKind op to `lhs` and `rhs`. +static Value getReductionOp(AtomicRMWKind op, OpBuilder &builder, Location loc, + Value lhs, Value rhs) { + switch (op) { + case AtomicRMWKind::addf: + return builder.create(loc, lhs, rhs); + case AtomicRMWKind::addi: + return builder.create(loc, lhs, rhs); + case AtomicRMWKind::mulf: + return builder.create(loc, lhs, rhs); + case AtomicRMWKind::muli: + return builder.create(loc, lhs, rhs); + // TODO: Add remaining reduction operations. + default: + emitOptionalError(loc, "Reduction operation type not supported"); + } + return nullptr; +} + /// Convert an `affine.parallel` (loop nest) operation into a `scf.parallel` /// operation. class AffineParallelLowering : public OpRewritePattern { @@ -369,12 +416,13 @@ class AffineParallelLowering : public OpRewritePattern { SmallVector steps; SmallVector upperBoundTuple; SmallVector lowerBoundTuple; + SmallVector identityVals; // Finding lower and upper bound by expanding the map expression. // Checking if expandAffineMap is not giving NULL. - Optional> upperBound = expandAffineMap( - rewriter, loc, op.upperBoundsMap(), op.getUpperBoundsOperands()); Optional> lowerBound = expandAffineMap( rewriter, loc, op.lowerBoundsMap(), op.getLowerBoundsOperands()); + Optional> upperBound = expandAffineMap( + rewriter, loc, op.upperBoundsMap(), op.getUpperBoundsOperands()); if (!lowerBound || !upperBound) return failure(); upperBoundTuple = *upperBound; @@ -383,13 +431,62 @@ class AffineParallelLowering : public OpRewritePattern { for (Attribute step : op.steps()) steps.push_back(rewriter.create( loc, step.cast().getInt())); - // Creating empty scf.parallel op body with appropriate bounds. - auto parallelOp = rewriter.create(loc, lowerBoundTuple, - upperBoundTuple, steps); - rewriter.eraseBlock(parallelOp.getBody()); - rewriter.inlineRegionBefore(op.region(), parallelOp.region(), - parallelOp.region().end()); - rewriter.eraseOp(op); + // Get the terminator op. + Operation *affineParOpTerminator = op.getBody()->getTerminator(); + scf::ParallelOp parOp; + if (op.results().empty()) { + // Case with no reduction operations/return values. + parOp = rewriter.create(loc, lowerBoundTuple, + upperBoundTuple, steps, + /*bodyBuilderFn=*/nullptr); + rewriter.eraseBlock(parOp.getBody()); + rewriter.inlineRegionBefore(op.region(), parOp.region(), + parOp.region().end()); + rewriter.replaceOp(op, parOp.results()); + return success(); + } + // Case with affine.parallel with reduction operations/return values. + // scf.parallel handles the reduction operation differently unlike + // affine.parallel. + ArrayRef reductions = op.reductions().getValue(); + for (Attribute reduction : reductions) { + // For each of the reduction operations get the identity values for + // initialization of the result values. + Optional reductionOp = symbolizeAtomicRMWKind( + static_cast(reduction.cast().getInt())); + assert(reductionOp.hasValue() && + "Reduction operation cannot be of None Type"); + AtomicRMWKind reductionOpValue = reductionOp.getValue(); + identityVals.push_back(getIdentityValue(reductionOpValue, rewriter, loc)); + } + parOp = rewriter.create( + loc, lowerBoundTuple, upperBoundTuple, steps, identityVals, + /*bodyBuilderFn=*/nullptr); + + // Copy the body of the affine.parallel op. + rewriter.eraseBlock(parOp.getBody()); + rewriter.inlineRegionBefore(op.region(), parOp.region(), + parOp.region().end()); + assert(reductions.size() == affineParOpTerminator->getNumOperands() && + "Unequal number of reductions and operands."); + for (unsigned i = 0, end = reductions.size(); i < end; i++) { + // For each of the reduction operations get the respective mlir::Value. + Optional reductionOp = + symbolizeAtomicRMWKind(reductions[i].cast().getInt()); + assert(reductionOp.hasValue() && + "Reduction Operation cannot be of None Type"); + AtomicRMWKind reductionOpValue = reductionOp.getValue(); + rewriter.setInsertionPoint(&parOp.getBody()->back()); + auto reduceOp = rewriter.create( + loc, affineParOpTerminator->getOperand(i)); + rewriter.setInsertionPointToEnd(&reduceOp.reductionOperator().front()); + Value reductionResult = + getReductionOp(reductionOpValue, rewriter, loc, + reduceOp.reductionOperator().front().getArgument(0), + reduceOp.reductionOperator().front().getArgument(1)); + rewriter.create(loc, reductionResult); + } + rewriter.replaceOp(op, parOp.results()); return success(); } }; diff --git a/mlir/test/Conversion/AffineToStandard/lower-affine.mlir b/mlir/test/Conversion/AffineToStandard/lower-affine.mlir index f89d913cb64a1..38d269913e51c 100644 --- a/mlir/test/Conversion/AffineToStandard/lower-affine.mlir +++ b/mlir/test/Conversion/AffineToStandard/lower-affine.mlir @@ -26,6 +26,30 @@ func @simple_loop() { ///////////////////////////////////////////////////////////////////// +func @for_with_yield(%buffer: memref<1024xf32>) -> (f32) { + %sum_0 = constant 0.0 : f32 + %sum = affine.for %i = 0 to 10 step 2 iter_args(%sum_iter = %sum_0) -> (f32) { + %t = affine.load %buffer[%i] : memref<1024xf32> + %sum_next = addf %sum_iter, %t : f32 + affine.yield %sum_next : f32 + } + return %sum : f32 +} + +// CHECK-LABEL: func @for_with_yield +// CHECK: %[[INIT_SUM:.*]] = constant 0.000000e+00 : f32 +// CHECK-NEXT: %[[LOWER:.*]] = constant 0 : index +// CHECK-NEXT: %[[UPPER:.*]] = constant 10 : index +// CHECK-NEXT: %[[STEP:.*]] = constant 2 : index +// CHECK-NEXT: %[[SUM:.*]] = scf.for %[[IV:.*]] = %[[LOWER]] to %[[UPPER]] step %[[STEP]] iter_args(%[[SUM_ITER:.*]] = %[[INIT_SUM]]) -> (f32) { +// CHECK-NEXT: load +// CHECK-NEXT: %[[SUM_NEXT:.*]] = addf +// CHECK-NEXT: scf.yield %[[SUM_NEXT]] : f32 +// CHECK-NEXT: } +// CHECK-NEXT: return %[[SUM]] : f32 + +///////////////////////////////////////////////////////////////////// + func private @pre(index) -> () func private @body2(index, index) -> () func private @post(index) -> () @@ -674,3 +698,104 @@ func @affine_parallel_tiled(%o: memref<100x100xf32>, %a: memref<100x100xf32>, %b // CHECK: %[[A4:.*]] = load %[[ARG2]][%[[arg8]], %[[arg7]]] : memref<100x100xf32> // CHECK: mulf %[[A3]], %[[A4]] : f32 // CHECK: scf.yield + +///////////////////////////////////////////////////////////////////// + +func @affine_parallel_simple(%arg0: memref<3x3xf32>, %arg1: memref<3x3xf32>) -> (memref<3x3xf32>) { + %O = alloc() : memref<3x3xf32> + affine.parallel (%kx, %ky) = (0, 0) to (2, 2) { + %1 = affine.load %arg0[%kx, %ky] : memref<3x3xf32> + %2 = affine.load %arg1[%kx, %ky] : memref<3x3xf32> + %3 = mulf %1, %2 : f32 + affine.store %3, %O[%kx, %ky] : memref<3x3xf32> + } + return %O : memref<3x3xf32> +} +// CHECK-LABEL: func @affine_parallel_simple +// CHECK: %[[LOWER_1:.*]] = constant 0 : index +// CHECK-NEXT: %[[LOWER_2:.*]] = constant 0 : index +// CHECK-NEXT: %[[UPPER_1:.*]] = constant 2 : index +// CHECK-NEXT: %[[UPPER_2:.*]] = constant 2 : index +// CHECK-NEXT: %[[STEP_1:.*]] = constant 1 : index +// CHECK-NEXT: %[[STEP_2:.*]] = constant 1 : index +// CHECK-NEXT: scf.parallel (%[[I:.*]], %[[J:.*]]) = (%[[LOWER_1]], %[[LOWER_2]]) to (%[[UPPER_1]], %[[UPPER_2]]) step (%[[STEP_1]], %[[STEP_2]]) { +// CHECK-NEXT: %[[VAL_1:.*]] = load +// CHECK-NEXT: %[[VAL_2:.*]] = load +// CHECK-NEXT: %[[PRODUCT:.*]] = mulf +// CHECK-NEXT: store +// CHECK-NEXT: scf.yield +// CHECK-NEXT: } +// CHECK-NEXT: return +// CHECK-NEXT: } + +///////////////////////////////////////////////////////////////////// + +func @affine_parallel_simple_dynamic_bounds(%arg0: memref, %arg1: memref, %arg2: memref) { + %c_0 = constant 0 : index + %output_dim = dim %arg0, %c_0 : memref + affine.parallel (%kx, %ky) = (%c_0, %c_0) to (%output_dim, %output_dim) { + %1 = affine.load %arg0[%kx, %ky] : memref + %2 = affine.load %arg1[%kx, %ky] : memref + %3 = mulf %1, %2 : f32 + affine.store %3, %arg2[%kx, %ky] : memref + } + return +} +// CHECK-LABEL: func @affine_parallel_simple_dynamic_bounds +// CHECK-SAME: %[[ARG_0:.*]]: memref, %[[ARG_1:.*]]: memref, %[[ARG_2:.*]]: memref +// CHECK: %[[DIM_INDEX:.*]] = constant 0 : index +// CHECK-NEXT: %[[UPPER:.*]] = dim %[[ARG_0]], %[[DIM_INDEX]] : memref +// CHECK-NEXT: %[[LOWER_1:.*]] = constant 0 : index +// CHECK-NEXT: %[[LOWER_2:.*]] = constant 0 : index +// CHECK-NEXT: %[[STEP_1:.*]] = constant 1 : index +// CHECK-NEXT: %[[STEP_2:.*]] = constant 1 : index +// CHECK-NEXT: scf.parallel (%[[I:.*]], %[[J:.*]]) = (%[[LOWER_1]], %[[LOWER_2]]) to (%[[UPPER]], %[[UPPER]]) step (%[[STEP_1]], %[[STEP_2]]) { +// CHECK-NEXT: %[[VAL_1:.*]] = load +// CHECK-NEXT: %[[VAL_2:.*]] = load +// CHECK-NEXT: %[[PRODUCT:.*]] = mulf +// CHECK-NEXT: store +// CHECK-NEXT: scf.yield +// CHECK-NEXT: } +// CHECK-NEXT: return +// CHECK-NEXT: } + +///////////////////////////////////////////////////////////////////// + +func @affine_parallel_with_reductions(%arg0: memref<3x3xf32>, %arg1: memref<3x3xf32>) -> (f32, f32) { + %0:2 = affine.parallel (%kx, %ky) = (0, 0) to (2, 2) reduce ("addf", "mulf") -> (f32, f32) { + %1 = affine.load %arg0[%kx, %ky] : memref<3x3xf32> + %2 = affine.load %arg1[%kx, %ky] : memref<3x3xf32> + %3 = mulf %1, %2 : f32 + %4 = addf %1, %2 : f32 + affine.yield %3, %4 : f32, f32 + } + return %0#0, %0#1 : f32, f32 +} +// CHECK-LABEL: func @affine_parallel_with_reductions +// CHECK: %[[LOWER_1:.*]] = constant 0 : index +// CHECK-NEXT: %[[LOWER_2:.*]] = constant 0 : index +// CHECK-NEXT: %[[UPPER_1:.*]] = constant 2 : index +// CHECK-NEXT: %[[UPPER_2:.*]] = constant 2 : index +// CHECK-NEXT: %[[STEP_1:.*]] = constant 1 : index +// CHECK-NEXT: %[[STEP_2:.*]] = constant 1 : index +// CHECK-NEXT: %[[INIT_1:.*]] = constant 0.000000e+00 : f32 +// CHECK-NEXT: %[[INIT_2:.*]] = constant 1.000000e+00 : f32 +// CHECK-NEXT: %[[RES:.*]] = scf.parallel (%[[I:.*]], %[[J:.*]]) = (%[[LOWER_1]], %[[LOWER_2]]) to (%[[UPPER_1]], %[[UPPER_2]]) step (%[[STEP_1]], %[[STEP_2]]) init (%[[INIT_1]], %[[INIT_2]]) -> (f32, f32) { +// CHECK-NEXT: %[[VAL_1:.*]] = load +// CHECK-NEXT: %[[VAL_2:.*]] = load +// CHECK-NEXT: %[[PRODUCT:.*]] = mulf +// CHECK-NEXT: %[[SUM:.*]] = addf +// CHECK-NEXT: scf.reduce(%[[PRODUCT]]) : f32 { +// CHECK-NEXT: ^bb0(%[[LHS:.*]]: f32, %[[RHS:.*]]: f32): +// CHECK-NEXT: %[[RES:.*]] = addf +// CHECK-NEXT: scf.reduce.return %[[RES]] : f32 +// CHECK-NEXT: } +// CHECK-NEXT: scf.reduce(%[[SUM]]) : f32 { +// CHECK-NEXT: ^bb0(%[[LHS:.*]]: f32, %[[RHS:.*]]: f32): +// CHECK-NEXT: %[[RES:.*]] = mulf +// CHECK-NEXT: scf.reduce.return %[[RES]] : f32 +// CHECK-NEXT: } +// CHECK-NEXT: scf.yield +// CHECK-NEXT: } +// CHECK-NEXT: return +// CHECK-NEXT: } From 0955d8df06355610bf539c53afd26bb62c500f44 Mon Sep 17 00:00:00 2001 From: Christian Sigg Date: Tue, 22 Dec 2020 17:39:00 +0100 Subject: [PATCH 098/378] [mlir] Add gpu.memcpy op. Reviewed By: herhut Differential Revision: https://reviews.llvm.org/D93197 --- mlir/include/mlir/Dialect/GPU/GPUOps.td | 35 +++++++++++++++++++++++++ mlir/lib/Dialect/GPU/IR/GPUDialect.cpp | 18 +++++++++++++ mlir/test/Dialect/GPU/invalid.mlir | 14 ++++++++++ mlir/test/Dialect/GPU/ops.mlir | 11 ++++++++ 4 files changed, 78 insertions(+) diff --git a/mlir/include/mlir/Dialect/GPU/GPUOps.td b/mlir/include/mlir/Dialect/GPU/GPUOps.td index 953a2d5c282c9..457477f7f3c1a 100644 --- a/mlir/include/mlir/Dialect/GPU/GPUOps.td +++ b/mlir/include/mlir/Dialect/GPU/GPUOps.td @@ -879,4 +879,39 @@ def GPU_DeallocOp : GPU_Op<"dealloc", [ }]; } +def GPU_MemcpyOp : GPU_Op<"memcpy", [ + GPU_AsyncOpInterface, MemoryEffects<[MemRead, MemWrite]> + ]> { + + let summary = "GPU memcpy operation"; + + let description = [{ + The `gpu.memcpy` operation copies the content of one memref to another. + + The op does not execute before all async dependencies have finished + executing. + + If the `async` keyword is present, the op is executed asynchronously (i.e. + it does not block until the execution has finished on the device). In + that case, it returns a !gpu.async.token. + + Example: + + ```mlir + %token = gpu.memcpy async [%dep] %dst, %src : memref, memref + ``` + }]; + + let arguments = (ins Variadic:$asyncDependencies, + Arg:$dst, + Arg:$src); + let results = (outs Optional:$asyncToken); + + let assemblyFormat = [{ + custom(type($asyncToken), $asyncDependencies) + $dst`,` $src `:` type($dst)`,` type($src) attr-dict + }]; + let verifier = [{ return ::verify(*this); }]; +} + #endif // GPU_OPS diff --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp index e8a90acf8830f..d3fa2ccb6dcd2 100644 --- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp +++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp @@ -22,6 +22,7 @@ #include "mlir/IR/FunctionImplementation.h" #include "mlir/IR/OpImplementation.h" #include "mlir/IR/PatternMatch.h" +#include "mlir/IR/TypeUtilities.h" #include "llvm/ADT/TypeSwitch.h" using namespace mlir; @@ -842,6 +843,23 @@ static void print(OpAsmPrinter &p, GPUModuleOp op) { /*printBlockTerminators=*/false); } +//===----------------------------------------------------------------------===// +// GPUMemcpyOp +//===----------------------------------------------------------------------===// + +static LogicalResult verify(MemcpyOp op) { + auto srcType = op.src().getType(); + auto dstType = op.dst().getType(); + + if (getElementTypeOrSelf(srcType) != getElementTypeOrSelf(dstType)) + return op.emitOpError("arguments have incompatible element type"); + + if (failed(verifyCompatibleShape(srcType, dstType))) + return op.emitOpError("arguments have incompatible shape"); + + return success(); +} + static ParseResult parseAsyncDependencies( OpAsmParser &parser, Type &asyncTokenType, SmallVectorImpl &asyncDependencies) { diff --git a/mlir/test/Dialect/GPU/invalid.mlir b/mlir/test/Dialect/GPU/invalid.mlir index 3dc5be405aac1..1f6058cde3974 100644 --- a/mlir/test/Dialect/GPU/invalid.mlir +++ b/mlir/test/Dialect/GPU/invalid.mlir @@ -444,3 +444,17 @@ func @async_wait_without_result() { // expected-error @+1 {{custom op 'gpu.wait' needs to be named when marked 'async'}} gpu.wait async } + +// ----- + +func @memcpy_incompatible_type(%dst : memref, %src : memref) { + // expected-error @+1 {{'gpu.memcpy' op arguments have incompatible element type}} + gpu.memcpy %dst, %src : memref, memref +} + +// ----- + +func @memcpy_incompatible_shape(%dst : memref<7xf32>, %src : memref<9xf32>) { + // expected-error @+1 {{'gpu.memcpy' op arguments have incompatible shape}} + gpu.memcpy %dst, %src : memref<7xf32>, memref<9xf32> +} diff --git a/mlir/test/Dialect/GPU/ops.mlir b/mlir/test/Dialect/GPU/ops.mlir index aed4368c22a71..5cea772ce5991 100644 --- a/mlir/test/Dialect/GPU/ops.mlir +++ b/mlir/test/Dialect/GPU/ops.mlir @@ -183,4 +183,15 @@ module attributes {gpu.container_module} { gpu.wait // Valid, but a no-op. return } + + func @memcpy(%dst : memref<3x7xf32>, %src : memref<3x7xf32, 1>) { + // CHECK-LABEL: func @memcpy + // CHECK: gpu.memcpy {{.*}}, {{.*}} : memref<3x7xf32>, memref<3x7xf32, 1> + gpu.memcpy %dst, %src : memref<3x7xf32>, memref<3x7xf32, 1> + // CHECK: %[[t0:.*]] = gpu.wait async + %0 = gpu.wait async + // CHECK: {{.*}} = gpu.memcpy async [%[[t0]]] {{.*}}, {{.*}} : memref<3x7xf32>, memref<3x7xf32, 1> + %1 = gpu.memcpy async [%0] %dst, %src : memref<3x7xf32>, memref<3x7xf32, 1> + return + } } From 8eec7294fea87273215592a2dc5bee6afd47d456 Mon Sep 17 00:00:00 2001 From: Paul Walker Date: Fri, 18 Dec 2020 18:42:58 +0000 Subject: [PATCH 099/378] [SVE] Lower vector BITREVERSE and BSWAP operations. These operations are lowered to RBIT and REVB instructions respectively. In the case of fixed-length support using SVE we also lower BITREVERSE operating on NEON sized vectors as this results in fewer instructions. Differential Revision: https://reviews.llvm.org/D93606 --- .../Target/AArch64/AArch64ISelLowering.cpp | 23 +- llvm/lib/Target/AArch64/AArch64ISelLowering.h | 2 + .../lib/Target/AArch64/AArch64SVEInstrInfo.td | 6 +- llvm/lib/Target/AArch64/SVEInstrFormats.td | 22 +- .../CodeGen/AArch64/sve-fixed-length-rev.ll | 643 ++++++++++++++++++ .../AArch64/sve-intrinsics-reversal.ll | 35 - llvm/test/CodeGen/AArch64/sve-rev.ll | 97 +++ 7 files changed, 776 insertions(+), 52 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/sve-fixed-length-rev.ll create mode 100644 llvm/test/CodeGen/AArch64/sve-rev.ll diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index e74bc739ddafe..48fbea840bad6 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -182,6 +182,8 @@ static bool isMergePassthruOpcode(unsigned Opc) { switch (Opc) { default: return false; + case AArch64ISD::BITREVERSE_MERGE_PASSTHRU: + case AArch64ISD::BSWAP_MERGE_PASSTHRU: case AArch64ISD::DUP_MERGE_PASSTHRU: case AArch64ISD::FNEG_MERGE_PASSTHRU: case AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU: @@ -1066,6 +1068,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, // splat of 0 or undef) once vector selects supported in SVE codegen. See // D68877 for more details. for (auto VT : {MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64}) { + setOperationAction(ISD::BITREVERSE, VT, Custom); + setOperationAction(ISD::BSWAP, VT, Custom); setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); setOperationAction(ISD::UINT_TO_FP, VT, Custom); setOperationAction(ISD::SINT_TO_FP, VT, Custom); @@ -1183,6 +1187,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::FP_ROUND, VT, Expand); // These operations are not supported on NEON but SVE can do them. + setOperationAction(ISD::BITREVERSE, MVT::v1i64, Custom); setOperationAction(ISD::MUL, MVT::v1i64, Custom); setOperationAction(ISD::MUL, MVT::v2i64, Custom); setOperationAction(ISD::SDIV, MVT::v8i8, Custom); @@ -1217,6 +1222,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, // Int operations with no NEON support. for (auto VT : {MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16, MVT::v2i32, MVT::v4i32, MVT::v2i64}) { + setOperationAction(ISD::BITREVERSE, VT, Custom); setOperationAction(ISD::VECREDUCE_AND, VT, Custom); setOperationAction(ISD::VECREDUCE_OR, VT, Custom); setOperationAction(ISD::VECREDUCE_XOR, VT, Custom); @@ -1330,6 +1336,8 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) { setOperationAction(ISD::ADD, VT, Custom); setOperationAction(ISD::AND, VT, Custom); setOperationAction(ISD::ANY_EXTEND, VT, Custom); + setOperationAction(ISD::BITREVERSE, VT, Custom); + setOperationAction(ISD::BSWAP, VT, Custom); setOperationAction(ISD::FADD, VT, Custom); setOperationAction(ISD::FCEIL, VT, Custom); setOperationAction(ISD::FDIV, VT, Custom); @@ -1934,6 +1942,8 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { MAKE_CASE(AArch64ISD::LDP) MAKE_CASE(AArch64ISD::STP) MAKE_CASE(AArch64ISD::STNP) + MAKE_CASE(AArch64ISD::BITREVERSE_MERGE_PASSTHRU) + MAKE_CASE(AArch64ISD::BSWAP_MERGE_PASSTHRU) MAKE_CASE(AArch64ISD::DUP_MERGE_PASSTHRU) MAKE_CASE(AArch64ISD::INDEX_VECTOR) MAKE_CASE(AArch64ISD::UABD) @@ -3646,7 +3656,13 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, return DAG.getNode(AArch64ISD::INSR, dl, Op.getValueType(), Op.getOperand(1), Scalar); } - + case Intrinsic::aarch64_sve_rbit: + return DAG.getNode(AArch64ISD::BITREVERSE_MERGE_PASSTHRU, dl, + Op.getValueType(), Op.getOperand(2), Op.getOperand(3), + Op.getOperand(1)); + case Intrinsic::aarch64_sve_revb: + return DAG.getNode(AArch64ISD::BSWAP_MERGE_PASSTHRU, dl, Op.getValueType(), + Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); case Intrinsic::aarch64_sve_sxtb: return DAG.getNode( AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(), @@ -4357,6 +4373,11 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op, return LowerFixedLengthVectorSelectToSVE(Op, DAG); case ISD::ABS: return LowerABS(Op, DAG); + case ISD::BITREVERSE: + return LowerToPredicatedOp(Op, DAG, AArch64ISD::BITREVERSE_MERGE_PASSTHRU, + /*OverrideNEON=*/true); + case ISD::BSWAP: + return LowerToPredicatedOp(Op, DAG, AArch64ISD::BSWAP_MERGE_PASSTHRU); } } diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 96da82e487619..36518a5349b4a 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -305,6 +305,8 @@ enum NodeType : unsigned { PTEST, PTRUE, + BITREVERSE_MERGE_PASSTHRU, + BSWAP_MERGE_PASSTHRU, DUP_MERGE_PASSTHRU, INDEX_VECTOR, diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index f28c55ae22e66..e9a823c6c4136 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -214,6 +214,8 @@ def AArch64frintn_mt : SDNode<"AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU", SDT_AArch def AArch64frintz_mt : SDNode<"AArch64ISD::FTRUNC_MERGE_PASSTHRU", SDT_AArch64Arith>; def AArch64fsqrt_mt : SDNode<"AArch64ISD::FSQRT_MERGE_PASSTHRU", SDT_AArch64Arith>; def AArch64frecpx_mt : SDNode<"AArch64ISD::FRECPX_MERGE_PASSTHRU", SDT_AArch64Arith>; +def AArch64rbit_mt : SDNode<"AArch64ISD::BITREVERSE_MERGE_PASSTHRU", SDT_AArch64Arith>; +def AArch64revb_mt : SDNode<"AArch64ISD::BSWAP_MERGE_PASSTHRU", SDT_AArch64Arith>; def SDT_AArch64FCVT : SDTypeProfile<1, 3, [ SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVec<3>, @@ -571,8 +573,8 @@ let Predicates = [HasSVE] in { defm INSR_ZV : sve_int_perm_insrv<"insr", AArch64insr>; defm EXT_ZZI : sve_int_perm_extract_i<"ext", AArch64ext>; - defm RBIT_ZPmZ : sve_int_perm_rev_rbit<"rbit", int_aarch64_sve_rbit>; - defm REVB_ZPmZ : sve_int_perm_rev_revb<"revb", int_aarch64_sve_revb, bswap>; + defm RBIT_ZPmZ : sve_int_perm_rev_rbit<"rbit", AArch64rbit_mt>; + defm REVB_ZPmZ : sve_int_perm_rev_revb<"revb", AArch64revb_mt>; defm REVH_ZPmZ : sve_int_perm_rev_revh<"revh", int_aarch64_sve_revh>; defm REVW_ZPmZ : sve_int_perm_rev_revw<"revw", int_aarch64_sve_revw>; diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index c4b4d95cd46d3..b5077cf263e74 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -6012,26 +6012,20 @@ multiclass sve_int_perm_rev_rbit { def _S : sve_int_perm_rev<0b10, 0b11, asm, ZPR32>; def _D : sve_int_perm_rev<0b11, 0b11, asm, ZPR64>; - def : SVE_3_Op_Pat(NAME # _B)>; - def : SVE_3_Op_Pat(NAME # _H)>; - def : SVE_3_Op_Pat(NAME # _S)>; - def : SVE_3_Op_Pat(NAME # _D)>; + def : SVE_1_Op_Passthru_Pat(NAME # _B)>; + def : SVE_1_Op_Passthru_Pat(NAME # _H)>; + def : SVE_1_Op_Passthru_Pat(NAME # _S)>; + def : SVE_1_Op_Passthru_Pat(NAME # _D)>; } -multiclass sve_int_perm_rev_revb { +multiclass sve_int_perm_rev_revb { def _H : sve_int_perm_rev<0b01, 0b00, asm, ZPR16>; def _S : sve_int_perm_rev<0b10, 0b00, asm, ZPR32>; def _D : sve_int_perm_rev<0b11, 0b00, asm, ZPR64>; - def : SVE_3_Op_Pat(NAME # _H)>; - def : SVE_3_Op_Pat(NAME # _S)>; - def : SVE_3_Op_Pat(NAME # _D)>; - - def : SVE_1_Op_AllActive_Pat(NAME # _H), PTRUE_H>; - def : SVE_1_Op_AllActive_Pat(NAME # _S), PTRUE_S>; - def : SVE_1_Op_AllActive_Pat(NAME # _D), PTRUE_D>; + def : SVE_1_Op_Passthru_Pat(NAME # _H)>; + def : SVE_1_Op_Passthru_Pat(NAME # _S)>; + def : SVE_1_Op_Passthru_Pat(NAME # _D)>; } multiclass sve_int_perm_rev_revh { diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-rev.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-rev.ll new file mode 100644 index 0000000000000..0e85c9e28defd --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-rev.ll @@ -0,0 +1,643 @@ +; RUN: llc -aarch64-sve-vector-bits-min=128 -asm-verbose=0 < %s | FileCheck %s -check-prefix=NO_SVE +; RUN: llc -aarch64-sve-vector-bits-min=256 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_EQ_256 +; RUN: llc -aarch64-sve-vector-bits-min=384 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK +; RUN: llc -aarch64-sve-vector-bits-min=512 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512 +; RUN: llc -aarch64-sve-vector-bits-min=640 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512 +; RUN: llc -aarch64-sve-vector-bits-min=768 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512 +; RUN: llc -aarch64-sve-vector-bits-min=896 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512 +; RUN: llc -aarch64-sve-vector-bits-min=1024 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024 +; RUN: llc -aarch64-sve-vector-bits-min=1152 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024 +; RUN: llc -aarch64-sve-vector-bits-min=1280 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024 +; RUN: llc -aarch64-sve-vector-bits-min=1408 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024 +; RUN: llc -aarch64-sve-vector-bits-min=1536 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024 +; RUN: llc -aarch64-sve-vector-bits-min=1664 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024 +; RUN: llc -aarch64-sve-vector-bits-min=1792 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024 +; RUN: llc -aarch64-sve-vector-bits-min=1920 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024 +; RUN: llc -aarch64-sve-vector-bits-min=2048 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024,VBITS_GE_2048 + +target triple = "aarch64-unknown-linux-gnu" + +; Don't use SVE when its registers are no bigger than NEON. +; NO_SVE-NOT: ptrue + +; +; RBIT +; + +define <8 x i8> @bitreverse_v8i8(<8 x i8> %op) #0 { +; CHECK-LABEL: bitreverse_v8i8: +; CHECK: ptrue [[PG:p[0-9]+]].b, vl8 +; CHECK-NEXT: rbit z0.b, [[PG]]/m, z0.b +; CHECK-NEXT: ret + %res = call <8 x i8> @llvm.bitreverse.v8i8(<8 x i8> %op) + ret <8 x i8> %res +} + +define <16 x i8> @bitreverse_v16i8(<16 x i8> %op) #0 { +; CHECK-LABEL: bitreverse_v16i8: +; CHECK: ptrue [[PG:p[0-9]+]].b, vl16 +; CHECK-NEXT: rbit z0.b, [[PG]]/m, z0.b +; CHECK-NEXT: ret + %res = call <16 x i8> @llvm.bitreverse.v16i8(<16 x i8> %op) + ret <16 x i8> %res +} + +define void @bitreverse_v32i8(<32 x i8>* %a) #0 { +; CHECK-LABEL: bitreverse_v32i8: +; CHECK: ptrue [[PG:p[0-9]+]].b, vl32 +; CHECK-NEXT: ld1b { [[OP:z[0-9]+]].b }, [[PG]]/z, [x0] +; CHECK-NEXT: rbit [[RES:z[0-9]+]].b, [[PG]]/m, [[OP]].b +; CHECK-NEXT: st1b { [[RES]].b }, [[PG]], [x0] +; CHECK-NEXT: ret + %op = load <32 x i8>, <32 x i8>* %a + %res = call <32 x i8> @llvm.bitreverse.v32i8(<32 x i8> %op) + store <32 x i8> %res, <32 x i8>* %a + ret void +} + +define void @bitreverse_v64i8(<64 x i8>* %a) #0 { +; CHECK-LABEL: bitreverse_v64i8: +; VBITS_GE_512: ptrue [[PG:p[0-9]+]].b, vl64 +; VBITS_GE_512-NEXT: ld1b { [[OP:z[0-9]+]].b }, [[PG]]/z, [x0] +; VBITS_GE_512-NEXT: rbit [[RES:z[0-9]+]].b, [[PG]]/m, [[OP]].b +; VBITS_GE_512-NEXT: st1b { [[RES]].b }, [[PG]], [x0] +; VBITS_GE_512-NEXT: ret +; +; Ensure sensible type legalisation. +; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].b, vl32 +; VBITS_EQ_256-DAG: mov w[[A:[0-9]+]], #32 +; VBITS_EQ_256-DAG: ld1b { [[OP_LO:z[0-9]+]].b }, [[PG]]/z, [x0] +; VBITS_EQ_256-DAG: ld1b { [[OP_HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[A]]] +; VBITS_EQ_256-DAG: rbit [[RES_LO:z[0-9]+]].b, [[PG]]/m, [[OP_LO]].b +; VBITS_EQ_256-DAG: rbit [[RES_HI:z[0-9]+]].b, [[PG]]/m, [[OP_HI]].b +; VBITS_EQ_256-DAG: st1b { [[RES_LO]].b }, [[PG]], [x0] +; VBITS_EQ_256-DAG: st1b { [[RES_HI]].b }, [[PG]], [x0, x[[A]]] +; VBITS_EQ_256-NEXT: ret + %op = load <64 x i8>, <64 x i8>* %a + %res = call <64 x i8> @llvm.bitreverse.v64i8(<64 x i8> %op) + store <64 x i8> %res, <64 x i8>* %a + ret void +} + +define void @bitreverse_v128i8(<128 x i8>* %a) #0 { +; CHECK-LABEL: bitreverse_v128i8: +; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].b, vl128 +; VBITS_GE_1024-NEXT: ld1b { [[OP:z[0-9]+]].b }, [[PG]]/z, [x0] +; VBITS_GE_1024-NEXT: rbit [[RES:z[0-9]+]].b, [[PG]]/m, [[OP]].b +; VBITS_GE_1024-NEXT: st1b { [[RES]].b }, [[PG]], [x0] +; VBITS_GE_1024-NEXT: ret + %op = load <128 x i8>, <128 x i8>* %a + %res = call <128 x i8> @llvm.bitreverse.v128i8(<128 x i8> %op) + store <128 x i8> %res, <128 x i8>* %a + ret void +} + +define void @bitreverse_v256i8(<256 x i8>* %a) #0 { +; CHECK-LABEL: bitreverse_v256i8: +; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].b, vl256 +; VBITS_GE_2048-NEXT: ld1b { [[OP:z[0-9]+]].b }, [[PG]]/z, [x0] +; VBITS_GE_2048-NEXT: rbit [[RES:z[0-9]+]].b, [[PG]]/m, [[OP]].b +; VBITS_GE_2048-NEXT: st1b { [[RES]].b }, [[PG]], [x0] +; VBITS_GE_2048-NEXT: ret + %op = load <256 x i8>, <256 x i8>* %a + %res = call <256 x i8> @llvm.bitreverse.v256i8(<256 x i8> %op) + store <256 x i8> %res, <256 x i8>* %a + ret void +} + +define <4 x i16> @bitreverse_v4i16(<4 x i16> %op) #0 { +; CHECK-LABEL: bitreverse_v4i16: +; CHECK: ptrue [[PG:p[0-9]+]].h, vl4 +; CHECK-NEXT: rbit z0.h, [[PG]]/m, z0.h +; CHECK-NEXT: ret + %res = call <4 x i16> @llvm.bitreverse.v4i16(<4 x i16> %op) + ret <4 x i16> %res +} + +define <8 x i16> @bitreverse_v8i16(<8 x i16> %op) #0 { +; CHECK-LABEL: bitreverse_v8i16: +; CHECK: ptrue [[PG:p[0-9]+]].h, vl8 +; CHECK-NEXT: rbit z0.h, [[PG]]/m, z0.h +; CHECK-NEXT: ret + %res = call <8 x i16> @llvm.bitreverse.v8i16(<8 x i16> %op) + ret <8 x i16> %res +} + +define void @bitreverse_v16i16(<16 x i16>* %a) #0 { +; CHECK-LABEL: bitreverse_v16i16: +; CHECK: ptrue [[PG:p[0-9]+]].h, vl16 +; CHECK-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] +; CHECK-NEXT: rbit [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h +; CHECK-NEXT: st1h { [[RES]].h }, [[PG]], [x0] +; CHECK-NEXT: ret + %op = load <16 x i16>, <16 x i16>* %a + %res = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> %op) + store <16 x i16> %res, <16 x i16>* %a + ret void +} + +define void @bitreverse_v32i16(<32 x i16>* %a) #0 { +; CHECK-LABEL: bitreverse_v32i16: +; VBITS_GE_512: ptrue [[PG:p[0-9]+]].h, vl32 +; VBITS_GE_512-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] +; VBITS_GE_512-NEXT: rbit [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h +; VBITS_GE_512-NEXT: st1h { [[RES]].h }, [[PG]], [x0] +; VBITS_GE_512-NEXT: ret + +; Ensure sensible type legalisation. +; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16 +; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: ld1h { [[OP_LO:z[0-9]+]].h }, [[PG]]/z, [x0] +; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: rbit [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP_LO]].h +; VBITS_EQ_256-DAG: rbit [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP_HI]].h +; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0] +; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x[[A_HI]] +; VBITS_EQ_256-NEXT: ret + %op = load <32 x i16>, <32 x i16>* %a + %res = call <32 x i16> @llvm.bitreverse.v32i16(<32 x i16> %op) + store <32 x i16> %res, <32 x i16>* %a + ret void +} + +define void @bitreverse_v64i16(<64 x i16>* %a) #0 { +; CHECK-LABEL: bitreverse_v64i16: +; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].h, vl64 +; VBITS_GE_1024-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] +; VBITS_GE_1024-NEXT: rbit [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h +; VBITS_GE_1024-NEXT: st1h { [[RES]].h }, [[PG]], [x0] +; VBITS_GE_1024-NEXT: ret + %op = load <64 x i16>, <64 x i16>* %a + %res = call <64 x i16> @llvm.bitreverse.v64i16(<64 x i16> %op) + store <64 x i16> %res, <64 x i16>* %a + ret void +} + +define void @bitreverse_v128i16(<128 x i16>* %a) #0 { +; CHECK-LABEL: bitreverse_v128i16: +; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].h, vl128 +; VBITS_GE_2048-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] +; VBITS_GE_2048-NEXT: rbit [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h +; VBITS_GE_2048-NEXT: st1h { [[RES]].h }, [[PG]], [x0] +; VBITS_GE_2048-NEXT: ret + %op = load <128 x i16>, <128 x i16>* %a + %res = call <128 x i16> @llvm.bitreverse.v128i16(<128 x i16> %op) + store <128 x i16> %res, <128 x i16>* %a + ret void +} + +define <2 x i32> @bitreverse_v2i32(<2 x i32> %op) #0 { +; CHECK-LABEL: bitreverse_v2i32: +; CHECK: ptrue [[PG:p[0-9]+]].s, vl2 +; CHECK-NEXT: rbit z0.s, [[PG]]/m, z0.s +; CHECK-NEXT: ret + %res = call <2 x i32> @llvm.bitreverse.v2i32(<2 x i32> %op) + ret <2 x i32> %res +} + +define <4 x i32> @bitreverse_v4i32(<4 x i32> %op) #0 { +; CHECK-LABEL: bitreverse_v4i32: +; CHECK: ptrue [[PG:p[0-9]+]].s, vl4 +; CHECK-NEXT: rbit z0.s, [[PG]]/m, z0.s +; CHECK-NEXT: ret + %res = call <4 x i32> @llvm.bitreverse.v4i32(<4 x i32> %op) + ret <4 x i32> %res +} + +define void @bitreverse_v8i32(<8 x i32>* %a) #0 { +; CHECK-LABEL: bitreverse_v8i32: +; CHECK: ptrue [[PG:p[0-9]+]].s, vl8 +; CHECK-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] +; CHECK-NEXT: rbit [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s +; CHECK-NEXT: st1w { [[RES]].s }, [[PG]], [x0] +; CHECK-NEXT: ret + %op = load <8 x i32>, <8 x i32>* %a + %res = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %op) + store <8 x i32> %res, <8 x i32>* %a + ret void +} + +define void @bitreverse_v16i32(<16 x i32>* %a) #0 { +; CHECK-LABEL: bitreverse_v16i32: +; VBITS_GE_512: ptrue [[PG:p[0-9]+]].s, vl16 +; VBITS_GE_512-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] +; VBITS_GE_512-NEXT: rbit [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s +; VBITS_GE_512-NEXT: st1w { [[RES]].s }, [[PG]], [x0] +; VBITS_GE_512-NEXT: ret + +; Ensure sensible type legalisation. +; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8 +; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: ld1w { [[OP_LO:z[0-9]+]].s }, [[PG]]/z, [x0] +; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: rbit [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP_LO]].s +; VBITS_EQ_256-DAG: rbit [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP_HI]].s +; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0] +; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x[[A_HI]] +; VBITS_EQ_256-NEXT: ret + %op = load <16 x i32>, <16 x i32>* %a + %res = call <16 x i32> @llvm.bitreverse.v16i32(<16 x i32> %op) + store <16 x i32> %res, <16 x i32>* %a + ret void +} + +define void @bitreverse_v32i32(<32 x i32>* %a) #0 { +; CHECK-LABEL: bitreverse_v32i32: +; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].s, vl32 +; VBITS_GE_1024-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] +; VBITS_GE_1024-NEXT: rbit [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s +; VBITS_GE_1024-NEXT: st1w { [[RES]].s }, [[PG]], [x0] +; VBITS_GE_1024-NEXT: ret + %op = load <32 x i32>, <32 x i32>* %a + %res = call <32 x i32> @llvm.bitreverse.v32i32(<32 x i32> %op) + store <32 x i32> %res, <32 x i32>* %a + ret void +} + +define void @bitreverse_v64i32(<64 x i32>* %a) #0 { +; CHECK-LABEL: bitreverse_v64i32: +; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].s, vl64 +; VBITS_GE_2048-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] +; VBITS_GE_2048-NEXT: rbit [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s +; VBITS_GE_2048-NEXT: st1w { [[RES]].s }, [[PG]], [x0] +; VBITS_GE_2048-NEXT: ret + %op = load <64 x i32>, <64 x i32>* %a + %res = call <64 x i32> @llvm.bitreverse.v64i32(<64 x i32> %op) + store <64 x i32> %res, <64 x i32>* %a + ret void +} + +define <1 x i64> @bitreverse_v1i64(<1 x i64> %op) #0 { +; CHECK-LABEL: bitreverse_v1i64: +; CHECK: ptrue [[PG:p[0-9]+]].d, vl1 +; CHECK-NEXT: rbit z0.d, [[PG]]/m, z0.d +; CHECK-NEXT: ret + %res = call <1 x i64> @llvm.bitreverse.v1i64(<1 x i64> %op) + ret <1 x i64> %res +} + +define <2 x i64> @bitreverse_v2i64(<2 x i64> %op) #0 { +; CHECK-LABEL: bitreverse_v2i64: +; CHECK: ptrue [[PG:p[0-9]+]].d, vl2 +; CHECK-NEXT: rbit z0.d, [[PG]]/m, z0.d +; CHECK-NEXT: ret + %res = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> %op) + ret <2 x i64> %res +} + +define void @bitreverse_v4i64(<4 x i64>* %a) #0 { +; CHECK-LABEL: bitreverse_v4i64: +; CHECK: ptrue [[PG:p[0-9]+]].d, vl4 +; CHECK-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] +; CHECK-NEXT: rbit [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d +; CHECK-NEXT: st1d { [[RES]].d }, [[PG]], [x0] +; CHECK-NEXT: ret + %op = load <4 x i64>, <4 x i64>* %a + %res = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> %op) + store <4 x i64> %res, <4 x i64>* %a + ret void +} + +define void @bitreverse_v8i64(<8 x i64>* %a) #0 { +; CHECK-LABEL: bitreverse_v8i64: +; VBITS_GE_512: ptrue [[PG:p[0-9]+]].d, vl8 +; VBITS_GE_512-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] +; VBITS_GE_512-NEXT: rbit [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d +; VBITS_GE_512-NEXT: st1d { [[RES]].d }, [[PG]], [x0] +; VBITS_GE_512-NEXT: ret + +; Ensure sensible type legalisation. +; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4 +; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: ld1d { [[OP_LO:z[0-9]+]].d }, [[PG]]/z, [x0] +; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: rbit [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP_LO]].d +; VBITS_EQ_256-DAG: rbit [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP_HI]].d +; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0] +; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x[[A_HI]] +; VBITS_EQ_256-NEXT: ret + %op = load <8 x i64>, <8 x i64>* %a + %res = call <8 x i64> @llvm.bitreverse.v8i64(<8 x i64> %op) + store <8 x i64> %res, <8 x i64>* %a + ret void +} + +define void @bitreverse_v16i64(<16 x i64>* %a) #0 { +; CHECK-LABEL: bitreverse_v16i64: +; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].d, vl16 +; VBITS_GE_1024-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] +; VBITS_GE_1024-NEXT: rbit [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d +; VBITS_GE_1024-NEXT: st1d { [[RES]].d }, [[PG]], [x0] +; VBITS_GE_1024-NEXT: ret + %op = load <16 x i64>, <16 x i64>* %a + %res = call <16 x i64> @llvm.bitreverse.v16i64(<16 x i64> %op) + store <16 x i64> %res, <16 x i64>* %a + ret void +} + +define void @bitreverse_v32i64(<32 x i64>* %a) #0 { +; CHECK-LABEL: bitreverse_v32i64: +; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].d, vl32 +; VBITS_GE_2048-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] +; VBITS_GE_2048-NEXT: rbit [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d +; VBITS_GE_2048-NEXT: st1d { [[RES]].d }, [[PG]], [x0] +; VBITS_GE_2048-NEXT: ret + %op = load <32 x i64>, <32 x i64>* %a + %res = call <32 x i64> @llvm.bitreverse.v32i64(<32 x i64> %op) + store <32 x i64> %res, <32 x i64>* %a + ret void +} + +; +; REVB +; + +; Don't use SVE for 64-bit vectors. +define <4 x i16> @bswap_v4i16(<4 x i16> %op) #0 { +; CHECK-LABEL: bswap_v4i16: +; CHECK: rev16 v0.8b, v0.8b +; CHECK-NEXT: ret + %res = call <4 x i16> @llvm.bswap.v4i16(<4 x i16> %op) + ret <4 x i16> %res +} + +; Don't use SVE for 128-bit vectors. +define <8 x i16> @bswap_v8i16(<8 x i16> %op) #0 { +; CHECK-LABEL: bswap_v8i16: +; CHECK: rev16 v0.16b, v0.16b +; CHECK-NEXT: ret + %res = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %op) + ret <8 x i16> %res +} + +define void @bswap_v16i16(<16 x i16>* %a) #0 { +; CHECK-LABEL: bswap_v16i16: +; CHECK: ptrue [[PG:p[0-9]+]].h, vl16 +; CHECK-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] +; CHECK-NEXT: revb [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h +; CHECK-NEXT: st1h { [[RES]].h }, [[PG]], [x0] +; CHECK-NEXT: ret + %op = load <16 x i16>, <16 x i16>* %a + %res = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %op) + store <16 x i16> %res, <16 x i16>* %a + ret void +} + +define void @bswap_v32i16(<32 x i16>* %a) #0 { +; CHECK-LABEL: bswap_v32i16: +; VBITS_GE_512: ptrue [[PG:p[0-9]+]].h, vl32 +; VBITS_GE_512-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] +; VBITS_GE_512-NEXT: revb [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h +; VBITS_GE_512-NEXT: st1h { [[RES]].h }, [[PG]], [x0] +; VBITS_GE_512-NEXT: ret + +; Ensure sensible type legalisation. +; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16 +; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: ld1h { [[OP_LO:z[0-9]+]].h }, [[PG]]/z, [x0] +; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: revb [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP_LO]].h +; VBITS_EQ_256-DAG: revb [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP_HI]].h +; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0] +; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x[[A_HI]] +; VBITS_EQ_256-NEXT: ret + %op = load <32 x i16>, <32 x i16>* %a + %res = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %op) + store <32 x i16> %res, <32 x i16>* %a + ret void +} + +define void @bswap_v64i16(<64 x i16>* %a) #0 { +; CHECK-LABEL: bswap_v64i16: +; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].h, vl64 +; VBITS_GE_1024-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] +; VBITS_GE_1024-NEXT: revb [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h +; VBITS_GE_1024-NEXT: st1h { [[RES]].h }, [[PG]], [x0] +; VBITS_GE_1024-NEXT: ret + %op = load <64 x i16>, <64 x i16>* %a + %res = call <64 x i16> @llvm.bswap.v64i16(<64 x i16> %op) + store <64 x i16> %res, <64 x i16>* %a + ret void +} + +define void @bswap_v128i16(<128 x i16>* %a) #0 { +; CHECK-LABEL: bswap_v128i16: +; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].h, vl128 +; VBITS_GE_2048-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] +; VBITS_GE_2048-NEXT: revb [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h +; VBITS_GE_2048-NEXT: st1h { [[RES]].h }, [[PG]], [x0] +; VBITS_GE_2048-NEXT: ret + %op = load <128 x i16>, <128 x i16>* %a + %res = call <128 x i16> @llvm.bswap.v128i16(<128 x i16> %op) + store <128 x i16> %res, <128 x i16>* %a + ret void +} + +; Don't use SVE for 64-bit vectors. +define <2 x i32> @bswap_v2i32(<2 x i32> %op) #0 { +; CHECK-LABEL: bswap_v2i32: +; CHECK: rev32 v0.8b, v0.8b +; CHECK-NEXT: ret + %res = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %op) + ret <2 x i32> %res +} + +; Don't use SVE for 128-bit vectors. +define <4 x i32> @bswap_v4i32(<4 x i32> %op) #0 { +; CHECK-LABEL: bswap_v4i32: +; CHECK: rev32 v0.16b, v0.16b +; CHECK-NEXT: ret + %res = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %op) + ret <4 x i32> %res +} + +define void @bswap_v8i32(<8 x i32>* %a) #0 { +; CHECK-LABEL: bswap_v8i32: +; CHECK: ptrue [[PG:p[0-9]+]].s, vl8 +; CHECK-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] +; CHECK-NEXT: revb [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s +; CHECK-NEXT: st1w { [[RES]].s }, [[PG]], [x0] +; CHECK-NEXT: ret + %op = load <8 x i32>, <8 x i32>* %a + %res = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %op) + store <8 x i32> %res, <8 x i32>* %a + ret void +} + +define void @bswap_v16i32(<16 x i32>* %a) #0 { +; CHECK-LABEL: bswap_v16i32: +; VBITS_GE_512: ptrue [[PG:p[0-9]+]].s, vl16 +; VBITS_GE_512-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] +; VBITS_GE_512-NEXT: revb [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s +; VBITS_GE_512-NEXT: st1w { [[RES]].s }, [[PG]], [x0] +; VBITS_GE_512-NEXT: ret + +; Ensure sensible type legalisation. +; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8 +; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: ld1w { [[OP_LO:z[0-9]+]].s }, [[PG]]/z, [x0] +; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: revb [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP_LO]].s +; VBITS_EQ_256-DAG: revb [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP_HI]].s +; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0] +; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x[[A_HI]] +; VBITS_EQ_256-NEXT: ret + %op = load <16 x i32>, <16 x i32>* %a + %res = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %op) + store <16 x i32> %res, <16 x i32>* %a + ret void +} + +define void @bswap_v32i32(<32 x i32>* %a) #0 { +; CHECK-LABEL: bswap_v32i32: +; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].s, vl32 +; VBITS_GE_1024-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] +; VBITS_GE_1024-NEXT: revb [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s +; VBITS_GE_1024-NEXT: st1w { [[RES]].s }, [[PG]], [x0] +; VBITS_GE_1024-NEXT: ret + %op = load <32 x i32>, <32 x i32>* %a + %res = call <32 x i32> @llvm.bswap.v32i32(<32 x i32> %op) + store <32 x i32> %res, <32 x i32>* %a + ret void +} + +define void @bswap_v64i32(<64 x i32>* %a) #0 { +; CHECK-LABEL: bswap_v64i32: +; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].s, vl64 +; VBITS_GE_2048-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] +; VBITS_GE_2048-NEXT: revb [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s +; VBITS_GE_2048-NEXT: st1w { [[RES]].s }, [[PG]], [x0] +; VBITS_GE_2048-NEXT: ret + %op = load <64 x i32>, <64 x i32>* %a + %res = call <64 x i32> @llvm.bswap.v64i32(<64 x i32> %op) + store <64 x i32> %res, <64 x i32>* %a + ret void +} + +; Don't use SVE for 64-bit vectors. +define <1 x i64> @bswap_v1i64(<1 x i64> %op) #0 { +; CHECK-LABEL: bswap_v1i64: +; CHECK: rev64 v0.8b, v0.8b +; CHECK-NEXT: ret + %res = call <1 x i64> @llvm.bswap.v1i64(<1 x i64> %op) + ret <1 x i64> %res +} + +; Don't use SVE for 128-bit vectors. +define <2 x i64> @bswap_v2i64(<2 x i64> %op) #0 { +; CHECK-LABEL: bswap_v2i64: +; CHECK: rev64 v0.16b, v0.16b +; CHECK-NEXT: ret + %res = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %op) + ret <2 x i64> %res +} + +define void @bswap_v4i64(<4 x i64>* %a) #0 { +; CHECK-LABEL: bswap_v4i64: +; CHECK: ptrue [[PG:p[0-9]+]].d, vl4 +; CHECK-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] +; CHECK-NEXT: revb [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d +; CHECK-NEXT: st1d { [[RES]].d }, [[PG]], [x0] +; CHECK-NEXT: ret + %op = load <4 x i64>, <4 x i64>* %a + %res = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %op) + store <4 x i64> %res, <4 x i64>* %a + ret void +} + +define void @bswap_v8i64(<8 x i64>* %a) #0 { +; CHECK-LABEL: bswap_v8i64: +; VBITS_GE_512: ptrue [[PG:p[0-9]+]].d, vl8 +; VBITS_GE_512-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] +; VBITS_GE_512-NEXT: revb [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d +; VBITS_GE_512-NEXT: st1d { [[RES]].d }, [[PG]], [x0] +; VBITS_GE_512-NEXT: ret + +; Ensure sensible type legalisation. +; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4 +; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: ld1d { [[OP_LO:z[0-9]+]].d }, [[PG]]/z, [x0] +; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: revb [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP_LO]].d +; VBITS_EQ_256-DAG: revb [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP_HI]].d +; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0] +; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x[[A_HI]] +; VBITS_EQ_256-NEXT: ret + %op = load <8 x i64>, <8 x i64>* %a + %res = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %op) + store <8 x i64> %res, <8 x i64>* %a + ret void +} + +define void @bswap_v16i64(<16 x i64>* %a) #0 { +; CHECK-LABEL: bswap_v16i64: +; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].d, vl16 +; VBITS_GE_1024-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] +; VBITS_GE_1024-NEXT: revb [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d +; VBITS_GE_1024-NEXT: st1d { [[RES]].d }, [[PG]], [x0] +; VBITS_GE_1024-NEXT: ret + %op = load <16 x i64>, <16 x i64>* %a + %res = call <16 x i64> @llvm.bswap.v16i64(<16 x i64> %op) + store <16 x i64> %res, <16 x i64>* %a + ret void +} + +define void @bswap_v32i64(<32 x i64>* %a) #0 { +; CHECK-LABEL: bswap_v32i64: +; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].d, vl32 +; VBITS_GE_2048-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] +; VBITS_GE_2048-NEXT: revb [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d +; VBITS_GE_2048-NEXT: st1d { [[RES]].d }, [[PG]], [x0] +; VBITS_GE_2048-NEXT: ret + %op = load <32 x i64>, <32 x i64>* %a + %res = call <32 x i64> @llvm.bswap.v32i64(<32 x i64> %op) + store <32 x i64> %res, <32 x i64>* %a + ret void +} + +attributes #0 = { "target-features"="+sve" } + +declare <8 x i8> @llvm.bitreverse.v8i8(<8 x i8>) +declare <16 x i8> @llvm.bitreverse.v16i8(<16 x i8>) +declare <32 x i8> @llvm.bitreverse.v32i8(<32 x i8>) +declare <64 x i8> @llvm.bitreverse.v64i8(<64 x i8>) +declare <128 x i8> @llvm.bitreverse.v128i8(<128 x i8>) +declare <256 x i8> @llvm.bitreverse.v256i8(<256 x i8>) +declare <4 x i16> @llvm.bitreverse.v4i16(<4 x i16>) +declare <8 x i16> @llvm.bitreverse.v8i16(<8 x i16>) +declare <16 x i16> @llvm.bitreverse.v16i16(<16 x i16>) +declare <32 x i16> @llvm.bitreverse.v32i16(<32 x i16>) +declare <64 x i16> @llvm.bitreverse.v64i16(<64 x i16>) +declare <128 x i16> @llvm.bitreverse.v128i16(<128 x i16>) +declare <2 x i32> @llvm.bitreverse.v2i32(<2 x i32>) +declare <4 x i32> @llvm.bitreverse.v4i32(<4 x i32>) +declare <8 x i32> @llvm.bitreverse.v8i32(<8 x i32>) +declare <16 x i32> @llvm.bitreverse.v16i32(<16 x i32>) +declare <32 x i32> @llvm.bitreverse.v32i32(<32 x i32>) +declare <64 x i32> @llvm.bitreverse.v64i32(<64 x i32>) +declare <1 x i64> @llvm.bitreverse.v1i64(<1 x i64>) +declare <2 x i64> @llvm.bitreverse.v2i64(<2 x i64>) +declare <4 x i64> @llvm.bitreverse.v4i64(<4 x i64>) +declare <8 x i64> @llvm.bitreverse.v8i64(<8 x i64>) +declare <16 x i64> @llvm.bitreverse.v16i64(<16 x i64>) +declare <32 x i64> @llvm.bitreverse.v32i64(<32 x i64>) + +declare <4 x i16> @llvm.bswap.v4i16(<4 x i16>) +declare <8 x i16> @llvm.bswap.v8i16(<8 x i16>) +declare <16 x i16> @llvm.bswap.v16i16(<16 x i16>) +declare <32 x i16> @llvm.bswap.v32i16(<32 x i16>) +declare <64 x i16> @llvm.bswap.v64i16(<64 x i16>) +declare <128 x i16> @llvm.bswap.v128i16(<128 x i16>) +declare <2 x i32> @llvm.bswap.v2i32(<2 x i32>) +declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>) +declare <8 x i32> @llvm.bswap.v8i32(<8 x i32>) +declare <16 x i32> @llvm.bswap.v16i32(<16 x i32>) +declare <32 x i32> @llvm.bswap.v32i32(<32 x i32>) +declare <64 x i32> @llvm.bswap.v64i32(<64 x i32>) +declare <1 x i64> @llvm.bswap.v1i64(<1 x i64>) +declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>) +declare <4 x i64> @llvm.bswap.v4i64(<4 x i64>) +declare <8 x i64> @llvm.bswap.v8i64(<8 x i64>) +declare <16 x i64> @llvm.bswap.v16i64(<16 x i64>) +declare <32 x i64> @llvm.bswap.v32i64(<32 x i64>) + diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-reversal.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-reversal.ll index ae3fa5c419ba0..33034f7bac01e 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-reversal.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-reversal.ll @@ -82,37 +82,6 @@ define @revb_i64( %a, %pg ret %out } -; -; REVB (bswap) -; - -define @revb_i16_bswap( %a) { -; CHECK-LABEL: revb_i16_bswap: -; CHECK: ptrue [[PG:p[0-9]+]].h -; CHECK-NEXT: revb z0.h, [[PG]]/m, z0.h -; CHECK-NEXT: ret - %res = call @llvm.bswap.nxv8i16( %a) - ret %res -} - -define @revb_i32_bswap( %a) { -; CHECK-LABEL: revb_i32_bswap: -; CHECK: ptrue [[PG:p[0-9]+]].s -; CHECK-NEXT: revb z0.s, [[PG]]/m, z0.s -; CHECK-NEXT: ret - %res = call @llvm.bswap.nxv4i32( %a) - ret %res -} - -define @revb_i64_bswap( %a) { -; CHECK-LABEL: revb_i64_bswap: -; CHECK: ptrue [[PG:p[0-9]+]].d -; CHECK-NEXT: revb z0.d, [[PG]]/m, z0.d -; CHECK-NEXT: ret - %res = call @llvm.bswap.nxv2i64( %a) - ret %res -} - ; ; REVH ; @@ -160,10 +129,6 @@ declare @llvm.aarch64.sve.revb.nxv8i16(, @llvm.aarch64.sve.revb.nxv4i32(, , ) declare @llvm.aarch64.sve.revb.nxv2i64(, , ) -declare @llvm.bswap.nxv8i16() -declare @llvm.bswap.nxv4i32() -declare @llvm.bswap.nxv2i64() - declare @llvm.aarch64.sve.revh.nxv4i32(, , ) declare @llvm.aarch64.sve.revh.nxv2i64(, , ) diff --git a/llvm/test/CodeGen/AArch64/sve-rev.ll b/llvm/test/CodeGen/AArch64/sve-rev.ll new file mode 100644 index 0000000000000..c1f9eda489885 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-rev.ll @@ -0,0 +1,97 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s 2>%t | FileCheck %s +; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t + +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. +; WARN-NOT: warning + +target triple = "aarch64-unknown-linux-gnu" + +; +; RBIT +; + +define @bitreverse_i8( %a) #0 { +; CHECK-LABEL: bitreverse_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: rbit z0.b, p0/m, z0.b +; CHECK-NEXT: ret + %res = call @llvm.bitreverse.nxv16i8( %a) + ret %res +} + +define @bitreverse_i16( %a) #0 { +; CHECK-LABEL: bitreverse_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: rbit z0.h, p0/m, z0.h +; CHECK-NEXT: ret + %res = call @llvm.bitreverse.nxv8i16( %a) + ret %res +} + +define @bitreverse_i32( %a) #0 { +; CHECK-LABEL: bitreverse_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: rbit z0.s, p0/m, z0.s +; CHECK-NEXT: ret + %res = call @llvm.bitreverse.nxv4i32( %a) + ret %res +} + +define @bitreverse_i64( %a) #0 { +; CHECK-LABEL: bitreverse_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: rbit z0.d, p0/m, z0.d +; CHECK-NEXT: ret + %res = call @llvm.bitreverse.nxv2i64( %a) + ret %res +} + +; +; REVB +; + +define @byteswap_i16( %a) #0 { +; CHECK-LABEL: byteswap_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: revb z0.h, p0/m, z0.h +; CHECK-NEXT: ret + %res = call @llvm.bswap.nxv8i16( %a) + ret %res +} + +define @byteswap_i32( %a) #0 { +; CHECK-LABEL: byteswap_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: revb z0.s, p0/m, z0.s +; CHECK-NEXT: ret + %res = call @llvm.bswap.nxv4i32( %a) + ret %res +} + +define @byteswap_i64( %a) #0 { +; CHECK-LABEL: byteswap_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: revb z0.d, p0/m, z0.d +; CHECK-NEXT: ret + %res = call @llvm.bswap.nxv2i64( %a) + ret %res +} + +attributes #0 = { "target-features"="+sve" } + +declare @llvm.bitreverse.nxv16i8() +declare @llvm.bitreverse.nxv8i16() +declare @llvm.bitreverse.nxv4i32() +declare @llvm.bitreverse.nxv2i64() + +declare @llvm.bswap.nxv8i16() +declare @llvm.bswap.nxv4i32() +declare @llvm.bswap.nxv2i64() From 9a7895dc20852b662a66976d06871ec2a0b968c8 Mon Sep 17 00:00:00 2001 From: sameeran joshi Date: Tue, 22 Dec 2020 21:57:26 +0530 Subject: [PATCH 100/378] [Flang][openmp][5.0] Add task_reduction clause. See OMP-5.0 2.19.5.5 task_reduction Clause. To add a positive test case we need `taskgroup` directive which is not added hence skipping the test. This is a dependency for `taskgroup` construct. Reviewed By: clementval Differential Revision: https://reviews.llvm.org/D93105 --- flang/include/flang/Parser/parse-tree.h | 2 +- flang/lib/Parser/openmp-parsers.cpp | 5 ++++- flang/lib/Parser/unparse.cpp | 2 +- flang/lib/Semantics/check-omp-structure.cpp | 1 + flang/lib/Semantics/check-omp-structure.h | 1 + flang/test/Semantics/omp-clause-validity01.f90 | 6 ++++-- llvm/include/llvm/Frontend/OpenMP/OMP.td | 1 + 7 files changed, 13 insertions(+), 5 deletions(-) diff --git a/flang/include/flang/Parser/parse-tree.h b/flang/include/flang/Parser/parse-tree.h index 7e258b668576e..119a92bee2116 100644 --- a/flang/include/flang/Parser/parse-tree.h +++ b/flang/include/flang/Parser/parse-tree.h @@ -3415,7 +3415,7 @@ struct OmpReductionOperator { // variable-name-list) struct OmpReductionClause { TUPLE_CLASS_BOILERPLATE(OmpReductionClause); - std::tuple> t; + std::tuple t; }; // OMP 5.0 2.11.4 allocate-clause -> ALLOCATE ([allocator:] variable-name-list) diff --git a/flang/lib/Parser/openmp-parsers.cpp b/flang/lib/Parser/openmp-parsers.cpp index 1386b2b16a788..3a0d28cd9c12f 100644 --- a/flang/lib/Parser/openmp-parsers.cpp +++ b/flang/lib/Parser/openmp-parsers.cpp @@ -102,7 +102,7 @@ TYPE_PARSER(construct(Parser{}) || construct(Parser{})) TYPE_PARSER(construct( - Parser{} / ":", nonemptyList(designator))) + Parser{} / ":", Parser{})) // OMP 5.0 2.11.4 ALLOCATE ([allocator:] variable-name-list) TYPE_PARSER(construct( @@ -220,6 +220,9 @@ TYPE_PARSER( parenthesized(Parser{}))) || "REDUCTION" >> construct(parenthesized(Parser{})) || + "TASK_REDUCTION" >> + construct(construct( + parenthesized(Parser{}))) || "RELAXED" >> construct(construct()) || "RELEASE" >> construct(construct()) || "SAFELEN" >> construct(construct( diff --git a/flang/lib/Parser/unparse.cpp b/flang/lib/Parser/unparse.cpp index fdb694f3d26f5..ba54a0a84fa73 100644 --- a/flang/lib/Parser/unparse.cpp +++ b/flang/lib/Parser/unparse.cpp @@ -2016,7 +2016,7 @@ class UnparseVisitor { Word("REDUCTION("); Walk(std::get(x.t)); Put(":"); - Walk(std::get>(x.t), ","); + Walk(std::get(x.t)); Put(")"); } void Unparse(const OmpAllocateClause &x) { diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp index e2c8333ce7ee4..a144c7a2b57b9 100644 --- a/flang/lib/Semantics/check-omp-structure.cpp +++ b/flang/lib/Semantics/check-omp-structure.cpp @@ -419,6 +419,7 @@ CHECK_SIMPLE_CLAUSE(Mergeable, OMPC_mergeable) CHECK_SIMPLE_CLAUSE(Nogroup, OMPC_nogroup) CHECK_SIMPLE_CLAUSE(Notinbranch, OMPC_notinbranch) CHECK_SIMPLE_CLAUSE(Nowait, OMPC_nowait) +CHECK_SIMPLE_CLAUSE(TaskReduction, OMPC_task_reduction) CHECK_SIMPLE_CLAUSE(To, OMPC_to) CHECK_SIMPLE_CLAUSE(Uniform, OMPC_uniform) CHECK_SIMPLE_CLAUSE(Untied, OMPC_untied) diff --git a/flang/lib/Semantics/check-omp-structure.h b/flang/lib/Semantics/check-omp-structure.h index a966eaf8c4a7d..ccd0e08a8c08a 100644 --- a/flang/lib/Semantics/check-omp-structure.h +++ b/flang/lib/Semantics/check-omp-structure.h @@ -155,6 +155,7 @@ class OmpStructureChecker void Enter(const parser::OmpClause::Safelen &); void Enter(const parser::OmpClause::Shared &); void Enter(const parser::OmpClause::Simdlen &); + void Enter(const parser::OmpClause::TaskReduction &); void Enter(const parser::OmpClause::ThreadLimit &); void Enter(const parser::OmpClause::To &); void Enter(const parser::OmpClause::Link &); diff --git a/flang/test/Semantics/omp-clause-validity01.f90 b/flang/test/Semantics/omp-clause-validity01.f90 index 3f53451378663..1d689ea916996 100644 --- a/flang/test/Semantics/omp-clause-validity01.f90 +++ b/flang/test/Semantics/omp-clause-validity01.f90 @@ -349,7 +349,8 @@ ! collapse-clause a = 0.0 - !$omp simd private(b) reduction(+:a) + !ERROR: TASK_REDUCTION clause is not allowed on the SIMD directive + !$omp simd private(b) reduction(+:a) task_reduction(+:a) do i = 1, N a = a + b + 3.14 enddo @@ -449,7 +450,8 @@ enddo !ERROR: At most one NUM_TASKS clause can appear on the TASKLOOP directive - !$omp taskloop num_tasks(3) num_tasks(2) + !ERROR: TASK_REDUCTION clause is not allowed on the TASKLOOP directive + !$omp taskloop num_tasks(3) num_tasks(2) task_reduction(*:a) do i = 1,N a = 3.14 enddo diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td index fa67a64fa9970..9fd14cb03a475 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMP.td +++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td @@ -231,6 +231,7 @@ def OMPC_IsDevicePtr : Clause<"is_device_ptr"> { } def OMPC_TaskReduction : Clause<"task_reduction"> { let clangClass = "OMPTaskReductionClause"; + let flangClassValue = "OmpReductionClause"; } def OMPC_InReduction : Clause<"in_reduction"> { let clangClass = "OMPInReductionClause"; From 8a58f21f5b6c228137a9b87906fe5b720c4d1dfb Mon Sep 17 00:00:00 2001 From: Kamau Bridgeman Date: Tue, 22 Dec 2020 12:04:57 -0500 Subject: [PATCH 101/378] [PowerPC][Power10] Exploit store rightmost vector element instructions Using the store rightmost vector element instructions to do vector element extraction and store. The rightmost vector element on little endian is the zeroth vector element, with these patterns that element can be extracted and stored in one instruction for all vector types. Differential Revision: https://reviews.llvm.org/D89195 --- llvm/lib/Target/PowerPC/PPCInstrPrefix.td | 25 +- .../CodeGen/PowerPC/builtins-ppc-p10vsx.ll | 261 ++++++++++-------- .../PowerPC/store-rightmost-vector-elt.ll | 109 ++++++++ 3 files changed, 277 insertions(+), 118 deletions(-) create mode 100644 llvm/test/CodeGen/PowerPC/store-rightmost-vector-elt.ll diff --git a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td index 54e9adae40d7e..e7fa2affb7309 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td +++ b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td @@ -2554,16 +2554,21 @@ let Predicates = [IsISA3_1, HasVSX] in { (COPY_TO_REGCLASS (XVCVBF16SPN RCCp.AToVSRC), VRRC)>; } -let AddedComplexity = 400, Predicates = [IsISA3_1] in { - def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$rS, 0)), xoaddr:$src), - (STXVRBX (COPY_TO_REGCLASS $rS, VSRC), xoaddr:$src)>; - def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$rS, 0)), xoaddr:$src), - (STXVRHX (COPY_TO_REGCLASS $rS, VSRC), xoaddr:$src)>; - def : Pat<(store (i32 (vector_extract v4i32:$rS, 0)), xoaddr:$src), - (STXVRWX (COPY_TO_REGCLASS $rS, VSRC), xoaddr:$src)>; - def : Pat<(store (i64 (vector_extract v2i64:$rS, 0)), xoaddr:$src), - (STXVRDX (COPY_TO_REGCLASS $rS, VSRC), xoaddr:$src)>; -} +let AddedComplexity = 400, Predicates = [IsISA3_1, IsLittleEndian] in { + // Store element 0 of a VSX register to memory + def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$src, 0)), xoaddr:$dst), + (STXVRBX (COPY_TO_REGCLASS v16i8:$src, VSRC), xoaddr:$dst)>; + def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$src, 0)), xoaddr:$dst), + (STXVRHX (COPY_TO_REGCLASS v8i16:$src, VSRC), xoaddr:$dst)>; + def : Pat<(store (i32 (extractelt v4i32:$src, 0)), xoaddr:$dst), + (STXVRWX (COPY_TO_REGCLASS v4i32:$src, VSRC), xoaddr:$dst)>; + def : Pat<(store (f32 (extractelt v4f32:$src, 0)), xoaddr:$dst), + (STXVRWX (COPY_TO_REGCLASS v4f32:$src, VSRC), xoaddr:$dst)>; + def : Pat<(store (i64 (extractelt v2i64:$src, 0)), xoaddr:$dst), + (STXVRDX (COPY_TO_REGCLASS v2i64:$src, VSRC), xoaddr:$dst)>; + def : Pat<(store (f64 (extractelt v2f64:$src, 0)), xoaddr:$dst), + (STXVRDX (COPY_TO_REGCLASS v2f64:$src, VSRC), xoaddr:$dst)>; + } class xxevalPattern imm> : Pat<(v4i32 pattern), (XXEVAL $vA, $vB, $vC, imm)> {} diff --git a/llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll b/llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll index 17617e90a01f4..9e8f8d073a1ae 100644 --- a/llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll +++ b/llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll @@ -1,13 +1,13 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ ; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \ -; RUN: FileCheck %s +; RUN: FileCheck %s --check-prefixes=CHECK,CHECK-LE ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ ; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \ -; RUN: FileCheck %s +; RUN: FileCheck %s --check-prefixes=CHECK,CHECK-BE ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O0 \ ; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \ -; RUN: FileCheck %s --check-prefix=CHECK-O0 +; RUN: FileCheck %s --check-prefixes=CHECK,CHECK-O0 ; These test cases aims to test the builtins for the Power10 VSX vector ; instructions introduced in ISA 3.1. @@ -22,14 +22,6 @@ define signext i32 @test_vec_test_lsbb_all_ones(<16 x i8> %vuca) { ; CHECK-NEXT: srwi r3, r3, 31 ; CHECK-NEXT: extsw r3, r3 ; CHECK-NEXT: blr -; -; CHECK-O0-LABEL: test_vec_test_lsbb_all_ones: -; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: xvtlsbb cr0, v2 -; CHECK-O0-NEXT: mfocrf r3, 128 -; CHECK-O0-NEXT: srwi r3, r3, 31 -; CHECK-O0-NEXT: extsw r3, r3 -; CHECK-O0-NEXT: blr entry: %0 = tail call i32 @llvm.ppc.vsx.xvtlsbb(<16 x i8> %vuca, i32 1) ret i32 %0 @@ -43,24 +35,22 @@ define signext i32 @test_vec_test_lsbb_all_zeros(<16 x i8> %vuca) { ; CHECK-NEXT: rlwinm r3, r3, 3, 31, 31 ; CHECK-NEXT: extsw r3, r3 ; CHECK-NEXT: blr -; -; CHECK-O0-LABEL: test_vec_test_lsbb_all_zeros: -; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: xvtlsbb cr0, v2 -; CHECK-O0-NEXT: mfocrf r3, 128 -; CHECK-O0-NEXT: rlwinm r3, r3, 3, 31, 31 -; CHECK-O0-NEXT: extsw r3, r3 -; CHECK-O0-NEXT: blr entry: %0 = tail call i32 @llvm.ppc.vsx.xvtlsbb(<16 x i8> %vuca, i32 0) ret i32 %0 } define void @vec_xst_trunc_sc(<1 x i128> %__vec, i64 %__offset, i8* nocapture %__ptr) { -; CHECK-LABEL: vec_xst_trunc_sc: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: stxvrbx v2, r6, r5 -; CHECK-NEXT: blr +; CHECK-LE-LABEL: vec_xst_trunc_sc: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: stxvrbx v2, r6, r5 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: vec_xst_trunc_sc: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: vsldoi v2, v2, v2, 9 +; CHECK-BE-NEXT: stxsibx v2, r6, r5 +; CHECK-BE-NEXT: blr ; ; CHECK-O0-LABEL: vec_xst_trunc_sc: ; CHECK-O0: # %bb.0: # %entry @@ -79,10 +69,16 @@ entry: } define void @vec_xst_trunc_uc(<1 x i128> %__vec, i64 %__offset, i8* nocapture %__ptr) { -; CHECK-LABEL: vec_xst_trunc_uc: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: stxvrbx v2, r6, r5 -; CHECK-NEXT: blr +; CHECK-LE-LABEL: vec_xst_trunc_uc: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: stxvrbx v2, r6, r5 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: vec_xst_trunc_uc: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: vsldoi v2, v2, v2, 9 +; CHECK-BE-NEXT: stxsibx v2, r6, r5 +; CHECK-BE-NEXT: blr ; ; CHECK-O0-LABEL: vec_xst_trunc_uc: ; CHECK-O0: # %bb.0: # %entry @@ -101,11 +97,18 @@ entry: } define void @vec_xst_trunc_ss(<1 x i128> %__vec, i64 %__offset, i16* nocapture %__ptr) { -; CHECK-LABEL: vec_xst_trunc_ss: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: sldi r3, r5, 1 -; CHECK-NEXT: stxvrhx v2, r6, r3 -; CHECK-NEXT: blr +; CHECK-LE-LABEL: vec_xst_trunc_ss: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: sldi r3, r5, 1 +; CHECK-LE-NEXT: stxvrhx v2, r6, r3 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: vec_xst_trunc_ss: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: vsldoi v2, v2, v2, 10 +; CHECK-BE-NEXT: sldi r3, r5, 1 +; CHECK-BE-NEXT: stxsihx v2, r6, r3 +; CHECK-BE-NEXT: blr ; ; CHECK-O0-LABEL: vec_xst_trunc_ss: ; CHECK-O0: # %bb.0: # %entry @@ -125,11 +128,18 @@ entry: } define void @vec_xst_trunc_us(<1 x i128> %__vec, i64 %__offset, i16* nocapture %__ptr) { -; CHECK-LABEL: vec_xst_trunc_us: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: sldi r3, r5, 1 -; CHECK-NEXT: stxvrhx v2, r6, r3 -; CHECK-NEXT: blr +; CHECK-LE-LABEL: vec_xst_trunc_us: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: sldi r3, r5, 1 +; CHECK-LE-NEXT: stxvrhx v2, r6, r3 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: vec_xst_trunc_us: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: vsldoi v2, v2, v2, 10 +; CHECK-BE-NEXT: sldi r3, r5, 1 +; CHECK-BE-NEXT: stxsihx v2, r6, r3 +; CHECK-BE-NEXT: blr ; ; CHECK-O0-LABEL: vec_xst_trunc_us: ; CHECK-O0: # %bb.0: # %entry @@ -149,11 +159,18 @@ entry: } define void @vec_xst_trunc_si(<1 x i128> %__vec, i64 %__offset, i32* nocapture %__ptr) { -; CHECK-LABEL: vec_xst_trunc_si: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: sldi r3, r5, 2 -; CHECK-NEXT: stxvrwx v2, r6, r3 -; CHECK-NEXT: blr +; CHECK-LE-LABEL: vec_xst_trunc_si: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: sldi r3, r5, 2 +; CHECK-LE-NEXT: stxvrwx v2, r6, r3 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: vec_xst_trunc_si: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: xxsldwi vs0, v2, v2, 3 +; CHECK-BE-NEXT: sldi r3, r5, 2 +; CHECK-BE-NEXT: stfiwx f0, r6, r3 +; CHECK-BE-NEXT: blr ; ; CHECK-O0-LABEL: vec_xst_trunc_si: ; CHECK-O0: # %bb.0: # %entry @@ -173,11 +190,18 @@ entry: } define void @vec_xst_trunc_ui(<1 x i128> %__vec, i64 %__offset, i32* nocapture %__ptr) { -; CHECK-LABEL: vec_xst_trunc_ui: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: sldi r3, r5, 2 -; CHECK-NEXT: stxvrwx v2, r6, r3 -; CHECK-NEXT: blr +; CHECK-LE-LABEL: vec_xst_trunc_ui: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: sldi r3, r5, 2 +; CHECK-LE-NEXT: stxvrwx v2, r6, r3 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: vec_xst_trunc_ui: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: xxsldwi vs0, v2, v2, 3 +; CHECK-BE-NEXT: sldi r3, r5, 2 +; CHECK-BE-NEXT: stfiwx f0, r6, r3 +; CHECK-BE-NEXT: blr ; ; CHECK-O0-LABEL: vec_xst_trunc_ui: ; CHECK-O0: # %bb.0: # %entry @@ -197,11 +221,17 @@ entry: } define void @vec_xst_trunc_sll(<1 x i128> %__vec, i64 %__offset, i64* nocapture %__ptr) { -; CHECK-LABEL: vec_xst_trunc_sll: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: sldi r3, r5, 3 -; CHECK-NEXT: stxvrdx v2, r6, r3 -; CHECK-NEXT: blr +; CHECK-LE-LABEL: vec_xst_trunc_sll: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: sldi r3, r5, 3 +; CHECK-LE-NEXT: stxvrdx v2, r6, r3 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: vec_xst_trunc_sll: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: sldi r3, r5, 3 +; CHECK-BE-NEXT: stxsdx v2, r6, r3 +; CHECK-BE-NEXT: blr ; ; CHECK-O0-LABEL: vec_xst_trunc_sll: ; CHECK-O0: # %bb.0: # %entry @@ -219,11 +249,17 @@ entry: } define void @vec_xst_trunc_ull(<1 x i128> %__vec, i64 %__offset, i64* nocapture %__ptr) { -; CHECK-LABEL: vec_xst_trunc_ull: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: sldi r3, r5, 3 -; CHECK-NEXT: stxvrdx v2, r6, r3 -; CHECK-NEXT: blr +; CHECK-LE-LABEL: vec_xst_trunc_ull: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: sldi r3, r5, 3 +; CHECK-LE-NEXT: stxvrdx v2, r6, r3 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: vec_xst_trunc_ull: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: sldi r3, r5, 3 +; CHECK-BE-NEXT: stxsdx v2, r6, r3 +; CHECK-BE-NEXT: blr ; ; CHECK-O0-LABEL: vec_xst_trunc_ull: ; CHECK-O0: # %bb.0: # %entry @@ -245,11 +281,6 @@ define dso_local <1 x i128> @vec_xl_zext(i64 %__offset, i8* nocapture readonly % ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: lxvrbx v2, r4, r3 ; CHECK-NEXT: blr -; -; CHECK-O0-LABEL: vec_xl_zext: -; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: lxvrbx v2, r4, r3 -; CHECK-O0-NEXT: blr entry: %add.ptr = getelementptr inbounds i8, i8* %__pointer, i64 %__offset %0 = load i8, i8* %add.ptr, align 1 @@ -264,12 +295,6 @@ define dso_local <1 x i128> @vec_xl_zext_short(i64 %__offset, i16* nocapture rea ; CHECK-NEXT: sldi r3, r3, 1 ; CHECK-NEXT: lxvrhx v2, r4, r3 ; CHECK-NEXT: blr -; -; CHECK-O0-LABEL: vec_xl_zext_short: -; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: sldi r3, r3, 1 -; CHECK-O0-NEXT: lxvrhx v2, r4, r3 -; CHECK-O0-NEXT: blr entry: %add.ptr = getelementptr inbounds i16, i16* %__pointer, i64 %__offset %0 = load i16, i16* %add.ptr, align 2 @@ -284,12 +309,6 @@ define dso_local <1 x i128> @vec_xl_zext_word(i64 %__offset, i32* nocapture read ; CHECK-NEXT: sldi r3, r3, 2 ; CHECK-NEXT: lxvrwx v2, r4, r3 ; CHECK-NEXT: blr -; -; CHECK-O0-LABEL: vec_xl_zext_word: -; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: sldi r3, r3, 2 -; CHECK-O0-NEXT: lxvrwx v2, r4, r3 -; CHECK-O0-NEXT: blr entry: %add.ptr = getelementptr inbounds i32, i32* %__pointer, i64 %__offset %0 = load i32, i32* %add.ptr, align 4 @@ -304,12 +323,6 @@ define dso_local <1 x i128> @vec_xl_zext_dw(i64 %__offset, i64* nocapture readon ; CHECK-NEXT: sldi r3, r3, 3 ; CHECK-NEXT: lxvrdx v2, r4, r3 ; CHECK-NEXT: blr -; -; CHECK-O0-LABEL: vec_xl_zext_dw: -; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: sldi r3, r3, 3 -; CHECK-O0-NEXT: lxvrdx v2, r4, r3 -; CHECK-O0-NEXT: blr entry: %add.ptr = getelementptr inbounds i64, i64* %__pointer, i64 %__offset %0 = load i64, i64* %add.ptr, align 8 @@ -319,13 +332,21 @@ entry: } define dso_local <1 x i128> @vec_xl_sext_b(i64 %offset, i8* %p) { -; CHECK-LABEL: vec_xl_sext_b: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lbzx r3, r4, r3 -; CHECK-NEXT: extsb r3, r3 -; CHECK-NEXT: sradi r4, r3, 63 -; CHECK-NEXT: mtvsrdd v2, r4, r3 -; CHECK-NEXT: blr +; CHECK-LE-LABEL: vec_xl_sext_b: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: lbzx r3, r4, r3 +; CHECK-LE-NEXT: extsb r3, r3 +; CHECK-LE-NEXT: sradi r4, r3, 63 +; CHECK-LE-NEXT: mtvsrdd v2, r4, r3 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: vec_xl_sext_b: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: lbzx r3, r4, r3 +; CHECK-BE-NEXT: extsb r3, r3 +; CHECK-BE-NEXT: sradi r4, r3, 63 +; CHECK-BE-NEXT: mtvsrdd v2, r4, r3 +; CHECK-BE-NEXT: blr ; ; CHECK-O0-LABEL: vec_xl_sext_b: ; CHECK-O0: # %bb.0: # %entry @@ -343,13 +364,21 @@ entry: } define dso_local <1 x i128> @vec_xl_sext_h(i64 %offset, i16* %p) { -; CHECK-LABEL: vec_xl_sext_h: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: sldi r3, r3, 1 -; CHECK-NEXT: lhax r3, r4, r3 -; CHECK-NEXT: sradi r4, r3, 63 -; CHECK-NEXT: mtvsrdd v2, r4, r3 -; CHECK-NEXT: blr +; CHECK-LE-LABEL: vec_xl_sext_h: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: sldi r3, r3, 1 +; CHECK-LE-NEXT: lhax r3, r4, r3 +; CHECK-LE-NEXT: sradi r4, r3, 63 +; CHECK-LE-NEXT: mtvsrdd v2, r4, r3 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: vec_xl_sext_h: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: sldi r3, r3, 1 +; CHECK-BE-NEXT: lhax r3, r4, r3 +; CHECK-BE-NEXT: sradi r4, r3, 63 +; CHECK-BE-NEXT: mtvsrdd v2, r4, r3 +; CHECK-BE-NEXT: blr ; ; CHECK-O0-LABEL: vec_xl_sext_h: ; CHECK-O0: # %bb.0: # %entry @@ -367,13 +396,21 @@ entry: } define dso_local <1 x i128> @vec_xl_sext_w(i64 %offset, i32* %p) { -; CHECK-LABEL: vec_xl_sext_w: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: sldi r3, r3, 2 -; CHECK-NEXT: lwax r3, r4, r3 -; CHECK-NEXT: sradi r4, r3, 63 -; CHECK-NEXT: mtvsrdd v2, r4, r3 -; CHECK-NEXT: blr +; CHECK-LE-LABEL: vec_xl_sext_w: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: sldi r3, r3, 2 +; CHECK-LE-NEXT: lwax r3, r4, r3 +; CHECK-LE-NEXT: sradi r4, r3, 63 +; CHECK-LE-NEXT: mtvsrdd v2, r4, r3 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: vec_xl_sext_w: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: sldi r3, r3, 2 +; CHECK-BE-NEXT: lwax r3, r4, r3 +; CHECK-BE-NEXT: sradi r4, r3, 63 +; CHECK-BE-NEXT: mtvsrdd v2, r4, r3 +; CHECK-BE-NEXT: blr ; ; CHECK-O0-LABEL: vec_xl_sext_w: ; CHECK-O0: # %bb.0: # %entry @@ -391,13 +428,21 @@ entry: } define dso_local <1 x i128> @vec_xl_sext_d(i64 %offset, i64* %p) { -; CHECK-LABEL: vec_xl_sext_d: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: sldi r3, r3, 3 -; CHECK-NEXT: ldx r3, r4, r3 -; CHECK-NEXT: sradi r4, r3, 63 -; CHECK-NEXT: mtvsrdd v2, r4, r3 -; CHECK-NEXT: blr +; CHECK-LE-LABEL: vec_xl_sext_d: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: sldi r3, r3, 3 +; CHECK-LE-NEXT: ldx r3, r4, r3 +; CHECK-LE-NEXT: sradi r4, r3, 63 +; CHECK-LE-NEXT: mtvsrdd v2, r4, r3 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: vec_xl_sext_d: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: sldi r3, r3, 3 +; CHECK-BE-NEXT: ldx r3, r4, r3 +; CHECK-BE-NEXT: sradi r4, r3, 63 +; CHECK-BE-NEXT: mtvsrdd v2, r4, r3 +; CHECK-BE-NEXT: blr ; ; CHECK-O0-LABEL: vec_xl_sext_d: ; CHECK-O0: # %bb.0: # %entry diff --git a/llvm/test/CodeGen/PowerPC/store-rightmost-vector-elt.ll b/llvm/test/CodeGen/PowerPC/store-rightmost-vector-elt.ll new file mode 100644 index 0000000000000..5fbcafecfb3d2 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/store-rightmost-vector-elt.ll @@ -0,0 +1,109 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr10 -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ +; RUN: < %s | FileCheck %s --check-prefix=CHECK-LE + +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -mcpu=pwr10 -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ +; RUN: < %s | FileCheck %s --check-prefix=CHECK-BE + +define void @test1(<4 x i32> %A, i32* %a) { +; CHECK-LE-LABEL: test1: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: stxvrwx v2, 0, r5 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: test1: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: xxsldwi vs0, v2, v2, 3 +; CHECK-BE-NEXT: stfiwx f0, 0, r5 +; CHECK-BE-NEXT: blr +entry: + %vecext = extractelement <4 x i32> %A, i32 0 + store i32 %vecext, i32* %a, align 4 + ret void +} + +define void @test2(<4 x float> %A, float* %a) { +; CHECK-LE-LABEL: test2: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: stxvrwx v2, 0, r5 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: test2: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: xxsldwi vs0, v2, v2, 3 +; CHECK-BE-NEXT: stfiwx f0, 0, r5 +; CHECK-BE-NEXT: blr +entry: + %vecext = extractelement <4 x float> %A, i32 0 + store float %vecext, float* %a, align 4 + ret void +} + +define void @test3(<2 x double> %A, double* %a) { +; CHECK-LE-LABEL: test3: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: stxvrdx v2, 0, r5 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: test3: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: stxsd v2, 0(r5) +; CHECK-BE-NEXT: blr +entry: + %vecext = extractelement <2 x double> %A, i32 0 + store double %vecext, double* %a, align 8 + ret void +} + +define void @test4(<2 x i64> %A, i64* %a) { +; CHECK-LE-LABEL: test4: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: stxvrdx v2, 0, r5 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: test4: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: stxsd v2, 0(r5) +; CHECK-BE-NEXT: blr +entry: + %vecext = extractelement <2 x i64> %A, i32 0 + store i64 %vecext, i64* %a, align 8 + ret void +} + +define void @test5(<8 x i16> %A, i16* %a) { +; CHECK-LE-LABEL: test5: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: stxvrhx v2, 0, r5 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: test5: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: vsldoi v2, v2, v2, 10 +; CHECK-BE-NEXT: stxsihx v2, 0, r5 +; CHECK-BE-NEXT: blr +entry: + %vecext = extractelement <8 x i16> %A, i32 0 + store i16 %vecext, i16* %a, align 2 + ret void +} + +define void @test6(<16 x i8> %A, i8* %a) { +; CHECK-LE-LABEL: test6: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: stxvrbx v2, 0, r5 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: test6: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: vsldoi v2, v2, v2, 9 +; CHECK-BE-NEXT: stxsibx v2, 0, r5 +; CHECK-BE-NEXT: blr +entry: + %vecext = extractelement <16 x i8> %A, i32 0 + store i8 %vecext, i8* %a, align 1 + ret void +} + From 5c1c8443eb7366e6e5086426b5d8dc7d24afc13b Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Mon, 21 Dec 2020 13:41:57 -0800 Subject: [PATCH 102/378] [lldb] Abstract scoped timer logic behind LLDB_SCOPED_TIMER (NFC) This patch introduces a LLDB_SCOPED_TIMER macro to hide the needlessly repetitive creation of scoped timers in LLDB. It's similar to the LLDB_LOG(F) macro. Differential revision: https://reviews.llvm.org/D93663 --- lldb/include/lldb/Utility/Timer.h | 7 +++ lldb/source/API/SystemInitializerFull.cpp | 3 -- lldb/source/Commands/CommandObjectTarget.cpp | 3 +- lldb/source/Core/Disassembler.cpp | 4 +- lldb/source/Core/Mangled.cpp | 7 +-- lldb/source/Core/Module.cpp | 43 ++++++------------- .../SystemInitializerCommon.cpp | 6 +-- .../source/Interpreter/CommandInterpreter.cpp | 10 ++--- .../CPlusPlus/CPPLanguageRuntime.cpp | 4 +- .../AppleObjCRuntime/AppleObjCRuntimeV2.cpp | 5 +-- .../BSD-Archive/ObjectContainerBSDArchive.cpp | 4 +- .../Plugins/ObjectFile/ELF/ObjectFileELF.cpp | 4 +- .../ObjectFile/Mach-O/ObjectFileMachO.cpp | 3 +- .../Lua/ScriptInterpreterLua.cpp | 3 +- .../Python/ScriptInterpreterPython.cpp | 9 ++-- .../SymbolFile/DWARF/DWARFDebugAranges.cpp | 3 +- .../Plugins/SymbolFile/DWARF/DWARFUnit.cpp | 8 +--- .../SymbolFile/DWARF/ManualDWARFIndex.cpp | 3 +- .../SymbolFile/DWARF/SymbolFileDWARF.cpp | 19 +++----- .../DWARF/SymbolFileDWARFDebugMap.cpp | 12 ++---- .../SymbolVendor/ELF/SymbolVendorELF.cpp | 3 +- .../SymbolVendor/wasm/SymbolVendorWasm.cpp | 3 +- lldb/source/Symbol/CompileUnit.cpp | 3 +- lldb/source/Symbol/DWARFCallFrameInfo.cpp | 3 +- lldb/source/Symbol/LocateSymbolFile.cpp | 9 ++-- lldb/source/Symbol/ObjectFile.cpp | 8 +--- lldb/source/Symbol/Symtab.cpp | 26 ++++------- lldb/source/Target/Target.cpp | 4 +- lldb/source/Target/TargetList.cpp | 7 ++- .../tools/lldb-test/SystemInitializerTest.cpp | 3 -- 30 files changed, 73 insertions(+), 156 deletions(-) diff --git a/lldb/include/lldb/Utility/Timer.h b/lldb/include/lldb/Utility/Timer.h index f97315b2db0fc..91f9c57c03c15 100644 --- a/lldb/include/lldb/Utility/Timer.h +++ b/lldb/include/lldb/Utility/Timer.h @@ -73,4 +73,11 @@ class Timer { } // namespace lldb_private +#define LLDB_SCOPED_TIMER() \ + static ::lldb_private::Timer::Category _cat(LLVM_PRETTY_FUNCTION); \ + ::lldb_private::Timer _scoped_timer(_cat, LLVM_PRETTY_FUNCTION) +#define LLDB_SCOPED_TIMERF(...) \ + static ::lldb_private::Timer::Category _cat(LLVM_PRETTY_FUNCTION); \ + ::lldb_private::Timer _scoped_timer(_cat, __VA_ARGS__) + #endif // LLDB_UTILITY_TIMER_H diff --git a/lldb/source/API/SystemInitializerFull.cpp b/lldb/source/API/SystemInitializerFull.cpp index a6421d8f10d0c..0530f94580b3e 100644 --- a/lldb/source/API/SystemInitializerFull.cpp +++ b/lldb/source/API/SystemInitializerFull.cpp @@ -69,9 +69,6 @@ llvm::Error SystemInitializerFull::Initialize() { } void SystemInitializerFull::Terminate() { - static Timer::Category func_cat(LLVM_PRETTY_FUNCTION); - Timer scoped_timer(func_cat, LLVM_PRETTY_FUNCTION); - Debugger::SettingsTerminate(); // Terminate plug-ins in core LLDB diff --git a/lldb/source/Commands/CommandObjectTarget.cpp b/lldb/source/Commands/CommandObjectTarget.cpp index c033493d4196c..4bce4e7e0734a 100644 --- a/lldb/source/Commands/CommandObjectTarget.cpp +++ b/lldb/source/Commands/CommandObjectTarget.cpp @@ -299,8 +299,7 @@ class CommandObjectTargetCreate : public CommandObjectParsed { } const char *file_path = command.GetArgumentAtIndex(0); - static Timer::Category func_cat(LLVM_PRETTY_FUNCTION); - Timer scoped_timer(func_cat, "(lldb) target create '%s'", file_path); + LLDB_SCOPED_TIMERF("(lldb) target create '%s'", file_path); FileSpec file_spec; if (file_path) { diff --git a/lldb/source/Core/Disassembler.cpp b/lldb/source/Core/Disassembler.cpp index 1015eafd252e8..3a975d9296f47 100644 --- a/lldb/source/Core/Disassembler.cpp +++ b/lldb/source/Core/Disassembler.cpp @@ -58,9 +58,7 @@ using namespace lldb_private; DisassemblerSP Disassembler::FindPlugin(const ArchSpec &arch, const char *flavor, const char *plugin_name) { - static Timer::Category func_cat(LLVM_PRETTY_FUNCTION); - Timer scoped_timer(func_cat, - "Disassembler::FindPlugin (arch = %s, plugin_name = %s)", + LLDB_SCOPED_TIMERF("Disassembler::FindPlugin (arch = %s, plugin_name = %s)", arch.GetArchitectureName(), plugin_name); DisassemblerCreateInstance create_callback = nullptr; diff --git a/lldb/source/Core/Mangled.cpp b/lldb/source/Core/Mangled.cpp index 143ec8770bf47..eaad0f3ebf45f 100644 --- a/lldb/source/Core/Mangled.cpp +++ b/lldb/source/Core/Mangled.cpp @@ -228,9 +228,7 @@ static char *GetItaniumDemangledStr(const char *M) { bool Mangled::DemangleWithRichManglingInfo( RichManglingContext &context, SkipMangledNameFn *skip_mangled_name) { // We need to generate and cache the demangled name. - static Timer::Category func_cat(LLVM_PRETTY_FUNCTION); - Timer scoped_timer(func_cat, - "Mangled::DemangleWithRichNameIndexInfo (m_mangled = %s)", + LLDB_SCOPED_TIMERF("Mangled::DemangleWithRichNameIndexInfo (m_mangled = %s)", m_mangled.GetCString()); // Others are not meant to arrive here. ObjC names or C's main() for example @@ -299,8 +297,7 @@ ConstString Mangled::GetDemangledName() const { // already decoded our mangled name. if (m_mangled && m_demangled.IsNull()) { // We need to generate and cache the demangled name. - static Timer::Category func_cat(LLVM_PRETTY_FUNCTION); - Timer scoped_timer(func_cat, "Mangled::GetDemangledName (m_mangled = %s)", + LLDB_SCOPED_TIMERF("Mangled::GetDemangledName (m_mangled = %s)", m_mangled.GetCString()); // Don't bother running anything that isn't mangled diff --git a/lldb/source/Core/Module.cpp b/lldb/source/Core/Module.cpp index b76659ee3e074..1f9987c216588 100644 --- a/lldb/source/Core/Module.cpp +++ b/lldb/source/Core/Module.cpp @@ -419,8 +419,7 @@ void Module::DumpSymbolContext(Stream *s) { size_t Module::GetNumCompileUnits() { std::lock_guard guard(m_mutex); - static Timer::Category func_cat(LLVM_PRETTY_FUNCTION); - Timer scoped_timer(func_cat, "Module::GetNumCompileUnits (module = %p)", + LLDB_SCOPED_TIMERF("Module::GetNumCompileUnits (module = %p)", static_cast(this)); if (SymbolFile *symbols = GetSymbolFile()) return symbols->GetNumCompileUnits(); @@ -441,9 +440,7 @@ CompUnitSP Module::GetCompileUnitAtIndex(size_t index) { bool Module::ResolveFileAddress(lldb::addr_t vm_addr, Address &so_addr) { std::lock_guard guard(m_mutex); - static Timer::Category func_cat(LLVM_PRETTY_FUNCTION); - Timer scoped_timer(func_cat, - "Module::ResolveFileAddress (vm_addr = 0x%" PRIx64 ")", + LLDB_SCOPED_TIMERF("Module::ResolveFileAddress (vm_addr = 0x%" PRIx64 ")", vm_addr); SectionList *section_list = GetSectionList(); if (section_list) @@ -594,9 +591,7 @@ uint32_t Module::ResolveSymbolContextsForFileSpec( const FileSpec &file_spec, uint32_t line, bool check_inlines, lldb::SymbolContextItem resolve_scope, SymbolContextList &sc_list) { std::lock_guard guard(m_mutex); - static Timer::Category func_cat(LLVM_PRETTY_FUNCTION); - Timer scoped_timer(func_cat, - "Module::ResolveSymbolContextForFilePath (%s:%u, " + LLDB_SCOPED_TIMERF("Module::ResolveSymbolContextForFilePath (%s:%u, " "check_inlines = %s, resolve_scope = 0x%8.8x)", file_spec.GetPath().c_str(), line, check_inlines ? "yes" : "no", resolve_scope); @@ -940,8 +935,7 @@ void Module::FindTypes_Impl( size_t max_matches, llvm::DenseSet &searched_symbol_files, TypeMap &types) { - static Timer::Category func_cat(LLVM_PRETTY_FUNCTION); - Timer scoped_timer(func_cat, LLVM_PRETTY_FUNCTION); + LLDB_SCOPED_TIMER(); if (SymbolFile *symbols = GetSymbolFile()) symbols->FindTypes(name, parent_decl_ctx, max_matches, searched_symbol_files, types); @@ -1028,8 +1022,7 @@ void Module::FindTypes( llvm::ArrayRef pattern, LanguageSet languages, llvm::DenseSet &searched_symbol_files, TypeMap &types) { - static Timer::Category func_cat(LLVM_PRETTY_FUNCTION); - Timer scoped_timer(func_cat, LLVM_PRETTY_FUNCTION); + LLDB_SCOPED_TIMER(); if (SymbolFile *symbols = GetSymbolFile()) symbols->FindTypes(pattern, languages, searched_symbol_files, types); } @@ -1040,8 +1033,7 @@ SymbolFile *Module::GetSymbolFile(bool can_create, Stream *feedback_strm) { if (!m_did_load_symfile.load() && can_create) { ObjectFile *obj_file = GetObjectFile(); if (obj_file != nullptr) { - static Timer::Category func_cat(LLVM_PRETTY_FUNCTION); - Timer scoped_timer(func_cat, LLVM_PRETTY_FUNCTION); + LLDB_SCOPED_TIMER(); m_symfile_up.reset( SymbolVendor::FindPlugin(shared_from_this(), feedback_strm)); m_did_load_symfile = true; @@ -1244,8 +1236,7 @@ ObjectFile *Module::GetObjectFile() { if (!m_did_load_objfile.load()) { std::lock_guard guard(m_mutex); if (!m_did_load_objfile.load()) { - static Timer::Category func_cat(LLVM_PRETTY_FUNCTION); - Timer scoped_timer(func_cat, "Module::GetObjectFile () module = %s", + LLDB_SCOPED_TIMERF("Module::GetObjectFile () module = %s", GetFileSpec().GetFilename().AsCString("")); lldb::offset_t data_offset = 0; lldb::offset_t file_size = 0; @@ -1312,9 +1303,8 @@ SectionList *Module::GetUnifiedSectionList() { const Symbol *Module::FindFirstSymbolWithNameAndType(ConstString name, SymbolType symbol_type) { - static Timer::Category func_cat(LLVM_PRETTY_FUNCTION); - Timer scoped_timer( - func_cat, "Module::FindFirstSymbolWithNameAndType (name = %s, type = %i)", + LLDB_SCOPED_TIMERF( + "Module::FindFirstSymbolWithNameAndType (name = %s, type = %i)", name.AsCString(), symbol_type); if (Symtab *symtab = GetSymtab()) return symtab->FindFirstSymbolWithNameAndType( @@ -1342,9 +1332,7 @@ void Module::SymbolIndicesToSymbolContextList( void Module::FindFunctionSymbols(ConstString name, uint32_t name_type_mask, SymbolContextList &sc_list) { - static Timer::Category func_cat(LLVM_PRETTY_FUNCTION); - Timer scoped_timer(func_cat, - "Module::FindSymbolsFunctions (name = %s, mask = 0x%8.8x)", + LLDB_SCOPED_TIMERF("Module::FindSymbolsFunctions (name = %s, mask = 0x%8.8x)", name.AsCString(), name_type_mask); if (Symtab *symtab = GetSymtab()) symtab->FindFunctionSymbols(name, name_type_mask, sc_list); @@ -1355,10 +1343,8 @@ void Module::FindSymbolsWithNameAndType(ConstString name, SymbolContextList &sc_list) { // No need to protect this call using m_mutex all other method calls are // already thread safe. - - static Timer::Category func_cat(LLVM_PRETTY_FUNCTION); - Timer scoped_timer( - func_cat, "Module::FindSymbolsWithNameAndType (name = %s, type = %i)", + LLDB_SCOPED_TIMERF( + "Module::FindSymbolsWithNameAndType (name = %s, type = %i)", name.AsCString(), symbol_type); if (Symtab *symtab = GetSymtab()) { std::vector symbol_indexes; @@ -1372,10 +1358,7 @@ void Module::FindSymbolsMatchingRegExAndType(const RegularExpression ®ex, SymbolContextList &sc_list) { // No need to protect this call using m_mutex all other method calls are // already thread safe. - - static Timer::Category func_cat(LLVM_PRETTY_FUNCTION); - Timer scoped_timer( - func_cat, + LLDB_SCOPED_TIMERF( "Module::FindSymbolsMatchingRegExAndType (regex = %s, type = %i)", regex.GetText().str().c_str(), symbol_type); if (Symtab *symtab = GetSymtab()) { diff --git a/lldb/source/Initialization/SystemInitializerCommon.cpp b/lldb/source/Initialization/SystemInitializerCommon.cpp index b29138c4884f6..d9f69f57703c7 100644 --- a/lldb/source/Initialization/SystemInitializerCommon.cpp +++ b/lldb/source/Initialization/SystemInitializerCommon.cpp @@ -131,8 +131,7 @@ llvm::Error SystemInitializerCommon::Initialize() { if (error) return error; - static Timer::Category func_cat(LLVM_PRETTY_FUNCTION); - Timer scoped_timer(func_cat, LLVM_PRETTY_FUNCTION); + LLDB_SCOPED_TIMER(); process_gdb_remote::ProcessGDBRemoteLog::Initialize(); @@ -147,8 +146,7 @@ llvm::Error SystemInitializerCommon::Initialize() { } void SystemInitializerCommon::Terminate() { - static Timer::Category func_cat(LLVM_PRETTY_FUNCTION); - Timer scoped_timer(func_cat, LLVM_PRETTY_FUNCTION); + LLDB_SCOPED_TIMER(); #if defined(_WIN32) ProcessWindowsLog::Terminate(); diff --git a/lldb/source/Interpreter/CommandInterpreter.cpp b/lldb/source/Interpreter/CommandInterpreter.cpp index 4d33d17289074..f5303f6867cd0 100644 --- a/lldb/source/Interpreter/CommandInterpreter.cpp +++ b/lldb/source/Interpreter/CommandInterpreter.cpp @@ -224,8 +224,7 @@ bool CommandInterpreter::GetSpaceReplPrompts() const { } void CommandInterpreter::Initialize() { - static Timer::Category func_cat(LLVM_PRETTY_FUNCTION); - Timer scoped_timer(func_cat, LLVM_PRETTY_FUNCTION); + LLDB_SCOPED_TIMER(); CommandReturnObject result(m_debugger.GetUseColor()); @@ -487,8 +486,7 @@ const char *CommandInterpreter::ProcessEmbeddedScriptCommands(const char *arg) { m_command_dict[NAME] = std::make_shared(*this); void CommandInterpreter::LoadCommandDictionary() { - static Timer::Category func_cat(LLVM_PRETTY_FUNCTION); - Timer scoped_timer(func_cat, LLVM_PRETTY_FUNCTION); + LLDB_SCOPED_TIMER(); REGISTER_COMMAND_OBJECT("apropos", CommandObjectApropos); REGISTER_COMMAND_OBJECT("breakpoint", CommandObjectMultiwordBreakpoint); @@ -1649,9 +1647,7 @@ bool CommandInterpreter::HandleCommand(const char *command_line, command_line); LLDB_LOGF(log, "Processing command: %s", command_line); - - static Timer::Category func_cat(LLVM_PRETTY_FUNCTION); - Timer scoped_timer(func_cat, "Handling command: %s.", command_line); + LLDB_SCOPED_TIMERF("Processing command: %s.", command_line); if (!no_context_switching) UpdateExecutionContext(override_context); diff --git a/lldb/source/Plugins/LanguageRuntime/CPlusPlus/CPPLanguageRuntime.cpp b/lldb/source/Plugins/LanguageRuntime/CPlusPlus/CPPLanguageRuntime.cpp index 067f5e0883808..24ab9cc5f238f 100644 --- a/lldb/source/Plugins/LanguageRuntime/CPlusPlus/CPPLanguageRuntime.cpp +++ b/lldb/source/Plugins/LanguageRuntime/CPlusPlus/CPPLanguageRuntime.cpp @@ -99,9 +99,7 @@ line_entry_helper(Target &target, const SymbolContext &sc, Symbol *symbol, CPPLanguageRuntime::LibCppStdFunctionCallableInfo CPPLanguageRuntime::FindLibCppStdFunctionCallableInfo( lldb::ValueObjectSP &valobj_sp) { - static Timer::Category func_cat(LLVM_PRETTY_FUNCTION); - Timer scoped_timer(func_cat, - "CPPLanguageRuntime::FindLibCppStdFunctionCallableInfo"); + LLDB_SCOPED_TIMER(); LibCppStdFunctionCallableInfo optional_info; diff --git a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.cpp b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.cpp index 6afea05c4e745..4d23443d5d3b0 100644 --- a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.cpp +++ b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.cpp @@ -1831,10 +1831,9 @@ lldb::addr_t AppleObjCRuntimeV2::GetSharedCacheReadOnlyAddress() { } void AppleObjCRuntimeV2::UpdateISAToDescriptorMapIfNeeded() { - Log *log(GetLogIfAnyCategoriesSet(LIBLLDB_LOG_PROCESS | LIBLLDB_LOG_TYPES)); + LLDB_SCOPED_TIMER(); - static Timer::Category func_cat(LLVM_PRETTY_FUNCTION); - Timer scoped_timer(func_cat, LLVM_PRETTY_FUNCTION); + Log *log(GetLogIfAnyCategoriesSet(LIBLLDB_LOG_PROCESS | LIBLLDB_LOG_TYPES)); // Else we need to check with our process to see when the map was updated. Process *process = GetProcess(); diff --git a/lldb/source/Plugins/ObjectContainer/BSD-Archive/ObjectContainerBSDArchive.cpp b/lldb/source/Plugins/ObjectContainer/BSD-Archive/ObjectContainerBSDArchive.cpp index 83cf9f8bd2694..211eb9ce0d3aa 100644 --- a/lldb/source/Plugins/ObjectContainer/BSD-Archive/ObjectContainerBSDArchive.cpp +++ b/lldb/source/Plugins/ObjectContainer/BSD-Archive/ObjectContainerBSDArchive.cpp @@ -300,9 +300,7 @@ ObjectContainer *ObjectContainerBSDArchive::CreateInstance( DataExtractor data; data.SetData(data_sp, data_offset, length); if (file && data_sp && ObjectContainerBSDArchive::MagicBytesMatch(data)) { - static Timer::Category func_cat(LLVM_PRETTY_FUNCTION); - Timer scoped_timer( - func_cat, + LLDB_SCOPED_TIMERF( "ObjectContainerBSDArchive::CreateInstance (module = %s, file = " "%p, file_offset = 0x%8.8" PRIx64 ", file_size = 0x%8.8" PRIx64 ")", module_sp->GetFileSpec().GetPath().c_str(), diff --git a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp index bca575b7f8842..82a08a235084d 100644 --- a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp +++ b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp @@ -576,9 +576,7 @@ size_t ObjectFileELF::GetModuleSpecifications( uint32_t core_notes_crc = 0; if (!gnu_debuglink_crc) { - static Timer::Category func_cat(LLVM_PRETTY_FUNCTION); - lldb_private::Timer scoped_timer( - func_cat, + LLDB_SCOPED_TIMERF( "Calculating module crc32 %s with size %" PRIu64 " KiB", file.GetLastPathComponent().AsCString(), (length - file_offset) / 1024); diff --git a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp index aafd5ab746b3c..463a2a52f5df6 100644 --- a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp +++ b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp @@ -2161,8 +2161,7 @@ ParseNList(DataExtractor &nlist_data, lldb::offset_t &nlist_data_offset, enum { DebugSymbols = true, NonDebugSymbols = false }; size_t ObjectFileMachO::ParseSymtab() { - static Timer::Category func_cat(LLVM_PRETTY_FUNCTION); - Timer scoped_timer(func_cat, "ObjectFileMachO::ParseSymtab () module = %s", + LLDB_SCOPED_TIMERF("ObjectFileMachO::ParseSymtab () module = %s", m_file.GetFilename().AsCString("")); ModuleSP module_sp(GetModule()); if (!module_sp) diff --git a/lldb/source/Plugins/ScriptInterpreter/Lua/ScriptInterpreterLua.cpp b/lldb/source/Plugins/ScriptInterpreter/Lua/ScriptInterpreterLua.cpp index 6672363164b16..239b409ac695c 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Lua/ScriptInterpreterLua.cpp +++ b/lldb/source/Plugins/ScriptInterpreter/Lua/ScriptInterpreterLua.cpp @@ -110,8 +110,7 @@ bool ScriptInterpreterLua::ExecuteOneLine(llvm::StringRef command, } void ScriptInterpreterLua::ExecuteInterpreterLoop() { - static Timer::Category func_cat(LLVM_PRETTY_FUNCTION); - Timer scoped_timer(func_cat, LLVM_PRETTY_FUNCTION); + LLDB_SCOPED_TIMER(); // At the moment, the only time the debugger does not have an input file // handle is when this is called directly from lua, in which case it is diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp index 5f950d42cac6f..6b53bd3a2edc4 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp +++ b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp @@ -1001,8 +1001,7 @@ bool ScriptInterpreterPythonImpl::ExecuteOneLine( } void ScriptInterpreterPythonImpl::ExecuteInterpreterLoop() { - static Timer::Category func_cat(LLVM_PRETTY_FUNCTION); - Timer scoped_timer(func_cat, LLVM_PRETTY_FUNCTION); + LLDB_SCOPED_TIMER(); Debugger &debugger = m_debugger; @@ -2220,8 +2219,7 @@ bool ScriptInterpreterPythonImpl::GetScriptedSummary( StructuredData::ObjectSP &callee_wrapper_sp, const TypeSummaryOptions &options, std::string &retval) { - static Timer::Category func_cat(LLVM_PRETTY_FUNCTION); - Timer scoped_timer(func_cat, LLVM_PRETTY_FUNCTION); + LLDB_SCOPED_TIMER(); if (!valobj.get()) { retval.assign(""); @@ -3240,8 +3238,7 @@ void ScriptInterpreterPythonImpl::InitializePrivate() { g_initialized = true; - static Timer::Category func_cat(LLVM_PRETTY_FUNCTION); - Timer scoped_timer(func_cat, LLVM_PRETTY_FUNCTION); + LLDB_SCOPED_TIMER(); // RAII-based initialization which correctly handles multiple-initialization, // version- specific differences among Python 2 and Python 3, and saving and diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAranges.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAranges.cpp index 7062c9bfae235..9f190fbcee87e 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAranges.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAranges.cpp @@ -78,8 +78,7 @@ void DWARFDebugAranges::AppendRange(dw_offset_t offset, dw_addr_t low_pc, } void DWARFDebugAranges::Sort(bool minimize) { - static Timer::Category func_cat(LLVM_PRETTY_FUNCTION); - Timer scoped_timer(func_cat, "%s this = %p", LLVM_PRETTY_FUNCTION, + LLDB_SCOPED_TIMERF("%s this = %p", LLVM_PRETTY_FUNCTION, static_cast(this)); m_aranges.Sort(); diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp index 12e8b25130a9c..d0cfb5fca82fd 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp @@ -49,9 +49,7 @@ void DWARFUnit::ExtractUnitDIEIfNeeded() { if (m_first_die) return; // Already parsed - static Timer::Category func_cat(LLVM_PRETTY_FUNCTION); - Timer scoped_timer(func_cat, "%8.8x: DWARFUnit::ExtractUnitDIEIfNeeded()", - GetOffset()); + LLDB_SCOPED_TIMERF("%8.8x: DWARFUnit::ExtractUnitDIEIfNeeded()", GetOffset()); // Set the offset to that of the first DIE and calculate the start of the // next compilation unit header. @@ -145,9 +143,7 @@ DWARFUnit::ScopedExtractDIEs &DWARFUnit::ScopedExtractDIEs::operator=( void DWARFUnit::ExtractDIEsRWLocked() { llvm::sys::ScopedWriter first_die_lock(m_first_die_mutex); - static Timer::Category func_cat(LLVM_PRETTY_FUNCTION); - Timer scoped_timer(func_cat, "%8.8x: DWARFUnit::ExtractDIEsIfNeeded()", - GetOffset()); + LLDB_SCOPED_TIMERF("%8.8x: DWARFUnit::ExtractDIEsIfNeeded()", GetOffset()); // Set the offset to that of the first DIE and calculate the start of the // next compilation unit header. diff --git a/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.cpp b/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.cpp index 0642e8a10f888..dda599baffebf 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.cpp @@ -28,8 +28,7 @@ void ManualDWARFIndex::Index() { SymbolFileDWARF &main_dwarf = *m_dwarf; m_dwarf = nullptr; - static Timer::Category func_cat(LLVM_PRETTY_FUNCTION); - Timer scoped_timer(func_cat, "%p", static_cast(&main_dwarf)); + LLDB_SCOPED_TIMERF("%p", static_cast(&main_dwarf)); DWARFDebugInfo &main_info = main_dwarf.DebugInfo(); SymbolFileDWARFDwo *dwp_dwarf = main_dwarf.GetDwpSymbolFile().get(); diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp index b19881ff929fb..afad21af240cd 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp @@ -623,8 +623,7 @@ DWARFDebugAbbrev *SymbolFileDWARF::DebugAbbrev() { DWARFDebugInfo &SymbolFileDWARF::DebugInfo() { llvm::call_once(m_info_once_flag, [&] { - static Timer::Category func_cat(LLVM_PRETTY_FUNCTION); - Timer scoped_timer(func_cat, "%s this = %p", LLVM_PRETTY_FUNCTION, + LLDB_SCOPED_TIMERF("%s this = %p", LLVM_PRETTY_FUNCTION, static_cast(this)); m_info = std::make_unique(*this, m_context); }); @@ -646,8 +645,7 @@ DWARFCompileUnit *SymbolFileDWARF::GetDWARFCompileUnit(CompileUnit *comp_unit) { DWARFDebugRanges *SymbolFileDWARF::GetDebugRanges() { if (!m_ranges) { - static Timer::Category func_cat(LLVM_PRETTY_FUNCTION); - Timer scoped_timer(func_cat, "%s this = %p", LLVM_PRETTY_FUNCTION, + LLDB_SCOPED_TIMERF("%s this = %p", LLVM_PRETTY_FUNCTION, static_cast(this)); if (m_context.getOrLoadRangesData().GetByteSize() > 0) @@ -829,8 +827,7 @@ XcodeSDK SymbolFileDWARF::ParseXcodeSDK(CompileUnit &comp_unit) { } size_t SymbolFileDWARF::ParseFunctions(CompileUnit &comp_unit) { - static Timer::Category func_cat(LLVM_PRETTY_FUNCTION); - Timer scoped_timer(func_cat, "SymbolFileDWARF::ParseFunctions"); + LLDB_SCOPED_TIMER(); std::lock_guard guard(GetModuleMutex()); DWARFUnit *dwarf_cu = GetDWARFCompileUnit(&comp_unit); if (!dwarf_cu) @@ -1839,9 +1836,7 @@ uint32_t SymbolFileDWARF::ResolveSymbolContext(const Address &so_addr, SymbolContextItem resolve_scope, SymbolContext &sc) { std::lock_guard guard(GetModuleMutex()); - static Timer::Category func_cat(LLVM_PRETTY_FUNCTION); - Timer scoped_timer(func_cat, - "SymbolFileDWARF::" + LLDB_SCOPED_TIMERF("SymbolFileDWARF::" "ResolveSymbolContext (so_addr = { " "section = %p, offset = 0x%" PRIx64 " }, resolve_scope = 0x%8.8x)", @@ -2277,8 +2272,7 @@ void SymbolFileDWARF::FindFunctions(ConstString name, bool include_inlines, SymbolContextList &sc_list) { std::lock_guard guard(GetModuleMutex()); - static Timer::Category func_cat(LLVM_PRETTY_FUNCTION); - Timer scoped_timer(func_cat, "SymbolFileDWARF::FindFunctions (name = '%s')", + LLDB_SCOPED_TIMERF("SymbolFileDWARF::FindFunctions (name = '%s')", name.AsCString()); // eFunctionNameTypeAuto should be pre-resolved by a call to @@ -2332,8 +2326,7 @@ void SymbolFileDWARF::FindFunctions(const RegularExpression ®ex, bool include_inlines, SymbolContextList &sc_list) { std::lock_guard guard(GetModuleMutex()); - static Timer::Category func_cat(LLVM_PRETTY_FUNCTION); - Timer scoped_timer(func_cat, "SymbolFileDWARF::FindFunctions (regex = '%s')", + LLDB_SCOPED_TIMERF("SymbolFileDWARF::FindFunctions (regex = '%s')", regex.GetText().str().c_str()); Log *log(LogChannelDWARF::GetLogIfAll(DWARF_LOG_LOOKUPS)); diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.cpp index 6515d78b8f236..fa24f975b0730 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.cpp @@ -1013,9 +1013,7 @@ void SymbolFileDWARFDebugMap::FindFunctions( FunctionNameType name_type_mask, bool include_inlines, SymbolContextList &sc_list) { std::lock_guard guard(GetModuleMutex()); - static Timer::Category func_cat(LLVM_PRETTY_FUNCTION); - Timer scoped_timer(func_cat, - "SymbolFileDWARFDebugMap::FindFunctions (name = %s)", + LLDB_SCOPED_TIMERF("SymbolFileDWARFDebugMap::FindFunctions (name = %s)", name.GetCString()); ForEachSymbolFile([&](SymbolFileDWARF *oso_dwarf) -> bool { @@ -1034,9 +1032,7 @@ void SymbolFileDWARFDebugMap::FindFunctions(const RegularExpression ®ex, bool include_inlines, SymbolContextList &sc_list) { std::lock_guard guard(GetModuleMutex()); - static Timer::Category func_cat(LLVM_PRETTY_FUNCTION); - Timer scoped_timer(func_cat, - "SymbolFileDWARFDebugMap::FindFunctions (regex = '%s')", + LLDB_SCOPED_TIMERF("SymbolFileDWARFDebugMap::FindFunctions (regex = '%s')", regex.GetText().str().c_str()); ForEachSymbolFile([&](SymbolFileDWARF *oso_dwarf) -> bool { @@ -1055,9 +1051,7 @@ void SymbolFileDWARFDebugMap::GetTypes(SymbolContextScope *sc_scope, lldb::TypeClass type_mask, TypeList &type_list) { std::lock_guard guard(GetModuleMutex()); - static Timer::Category func_cat(LLVM_PRETTY_FUNCTION); - Timer scoped_timer(func_cat, - "SymbolFileDWARFDebugMap::GetTypes (type_mask = 0x%8.8x)", + LLDB_SCOPED_TIMERF("SymbolFileDWARFDebugMap::GetTypes (type_mask = 0x%8.8x)", type_mask); SymbolFileDWARF *oso_dwarf = nullptr; diff --git a/lldb/source/Plugins/SymbolVendor/ELF/SymbolVendorELF.cpp b/lldb/source/Plugins/SymbolVendor/ELF/SymbolVendorELF.cpp index 89b07d22e350d..4df5140bd7e1a 100644 --- a/lldb/source/Plugins/SymbolVendor/ELF/SymbolVendorELF.cpp +++ b/lldb/source/Plugins/SymbolVendor/ELF/SymbolVendorELF.cpp @@ -81,8 +81,7 @@ SymbolVendorELF::CreateInstance(const lldb::ModuleSP &module_sp, if (!fspec) fspec = obj_file->GetDebugLink().getValueOr(FileSpec()); - static Timer::Category func_cat(LLVM_PRETTY_FUNCTION); - Timer scoped_timer(func_cat, "SymbolVendorELF::CreateInstance (module = %s)", + LLDB_SCOPED_TIMERF("SymbolVendorELF::CreateInstance (module = %s)", module_sp->GetFileSpec().GetPath().c_str()); ModuleSpec module_spec; diff --git a/lldb/source/Plugins/SymbolVendor/wasm/SymbolVendorWasm.cpp b/lldb/source/Plugins/SymbolVendor/wasm/SymbolVendorWasm.cpp index 1c09dabc5622f..67a1ef5e4e515 100644 --- a/lldb/source/Plugins/SymbolVendor/wasm/SymbolVendorWasm.cpp +++ b/lldb/source/Plugins/SymbolVendor/wasm/SymbolVendorWasm.cpp @@ -72,8 +72,7 @@ SymbolVendorWasm::CreateInstance(const lldb::ModuleSP &module_sp, lldb::eSectionTypeDWARFDebugInfo, true)) return nullptr; - static Timer::Category func_cat(LLVM_PRETTY_FUNCTION); - Timer scoped_timer(func_cat, "SymbolVendorWasm::CreateInstance (module = %s)", + LLDB_SCOPED_TIMERF("SymbolVendorWasm::CreateInstance (module = %s)", module_sp->GetFileSpec().GetPath().c_str()); ModuleSpec module_spec; diff --git a/lldb/source/Symbol/CompileUnit.cpp b/lldb/source/Symbol/CompileUnit.cpp index 0c67bf5b702a4..822f0df4da37a 100644 --- a/lldb/source/Symbol/CompileUnit.cpp +++ b/lldb/source/Symbol/CompileUnit.cpp @@ -75,8 +75,7 @@ void CompileUnit::ForeachFunction( lldb::FunctionSP CompileUnit::FindFunction( llvm::function_ref matching_lambda) { - static Timer::Category func_cat(LLVM_PRETTY_FUNCTION); - Timer scoped_timer(func_cat, "CompileUnit::FindFunction"); + LLDB_SCOPED_TIMER(); lldb::ModuleSP module = CalculateSymbolContextModule(); diff --git a/lldb/source/Symbol/DWARFCallFrameInfo.cpp b/lldb/source/Symbol/DWARFCallFrameInfo.cpp index 3111c33c71088..f0dce8f4793a1 100644 --- a/lldb/source/Symbol/DWARFCallFrameInfo.cpp +++ b/lldb/source/Symbol/DWARFCallFrameInfo.cpp @@ -419,8 +419,7 @@ void DWARFCallFrameInfo::GetFDEIndex() { if (m_fde_index_initialized) // if two threads hit the locker return; - static Timer::Category func_cat(LLVM_PRETTY_FUNCTION); - Timer scoped_timer(func_cat, "%s - %s", LLVM_PRETTY_FUNCTION, + LLDB_SCOPED_TIMERF("%s - %s", LLVM_PRETTY_FUNCTION, m_objfile.GetFileSpec().GetFilename().AsCString("")); bool clear_address_zeroth_bit = false; diff --git a/lldb/source/Symbol/LocateSymbolFile.cpp b/lldb/source/Symbol/LocateSymbolFile.cpp index af4bbb6e53608..ba79bf661cd37 100644 --- a/lldb/source/Symbol/LocateSymbolFile.cpp +++ b/lldb/source/Symbol/LocateSymbolFile.cpp @@ -209,9 +209,7 @@ static FileSpec LocateExecutableSymbolFileDsym(const ModuleSpec &module_spec) { const ArchSpec *arch = module_spec.GetArchitecturePtr(); const UUID *uuid = module_spec.GetUUIDPtr(); - static Timer::Category func_cat(LLVM_PRETTY_FUNCTION); - Timer scoped_timer( - func_cat, + LLDB_SCOPED_TIMERF( "LocateExecutableSymbolFileDsym (file = %s, arch = %s, uuid = %p)", exec_fspec ? exec_fspec->GetFilename().AsCString("") : "", arch ? arch->GetArchitectureName() : "", (const void *)uuid); @@ -235,9 +233,8 @@ ModuleSpec Symbols::LocateExecutableObjectFile(const ModuleSpec &module_spec) { const FileSpec &exec_fspec = module_spec.GetFileSpec(); const ArchSpec *arch = module_spec.GetArchitecturePtr(); const UUID *uuid = module_spec.GetUUIDPtr(); - static Timer::Category func_cat(LLVM_PRETTY_FUNCTION); - Timer scoped_timer( - func_cat, "LocateExecutableObjectFile (file = %s, arch = %s, uuid = %p)", + LLDB_SCOPED_TIMERF( + "LocateExecutableObjectFile (file = %s, arch = %s, uuid = %p)", exec_fspec ? exec_fspec.GetFilename().AsCString("") : "", arch ? arch->GetArchitectureName() : "", (const void *)uuid); diff --git a/lldb/source/Symbol/ObjectFile.cpp b/lldb/source/Symbol/ObjectFile.cpp index 79683284f36a7..ffe57121391f0 100644 --- a/lldb/source/Symbol/ObjectFile.cpp +++ b/lldb/source/Symbol/ObjectFile.cpp @@ -35,9 +35,7 @@ ObjectFile::FindPlugin(const lldb::ModuleSP &module_sp, const FileSpec *file, ObjectFileSP object_file_sp; if (module_sp) { - static Timer::Category func_cat(LLVM_PRETTY_FUNCTION); - Timer scoped_timer( - func_cat, + LLDB_SCOPED_TIMERF( "ObjectFile::FindPlugin (module = %s, file = %p, file_offset = " "0x%8.8" PRIx64 ", file_size = 0x%8.8" PRIx64 ")", module_sp->GetFileSpec().GetPath().c_str(), @@ -174,9 +172,7 @@ ObjectFileSP ObjectFile::FindPlugin(const lldb::ModuleSP &module_sp, ObjectFileSP object_file_sp; if (module_sp) { - static Timer::Category func_cat(LLVM_PRETTY_FUNCTION); - Timer scoped_timer(func_cat, - "ObjectFile::FindPlugin (module = " + LLDB_SCOPED_TIMERF("ObjectFile::FindPlugin (module = " "%s, process = %p, header_addr = " "0x%" PRIx64 ")", module_sp->GetFileSpec().GetPath().c_str(), diff --git a/lldb/source/Symbol/Symtab.cpp b/lldb/source/Symbol/Symtab.cpp index 3f697e6076b37..7f84243347089 100644 --- a/lldb/source/Symbol/Symtab.cpp +++ b/lldb/source/Symbol/Symtab.cpp @@ -251,8 +251,7 @@ void Symtab::InitNameIndexes() { // Protected function, no need to lock mutex... if (!m_name_indexes_computed) { m_name_indexes_computed = true; - static Timer::Category func_cat(LLVM_PRETTY_FUNCTION); - Timer scoped_timer(func_cat, "%s", LLVM_PRETTY_FUNCTION); + LLDB_SCOPED_TIMER(); // Create the name index vector to be able to quickly search by name const size_t num_symbols = m_symbols.size(); m_name_to_index.Reserve(num_symbols); @@ -411,9 +410,8 @@ void Symtab::PreloadSymbols() { void Symtab::AppendSymbolNamesToMap(const IndexCollection &indexes, bool add_demangled, bool add_mangled, NameToIndexMap &name_to_index_map) const { + LLDB_SCOPED_TIMER(); if (add_demangled || add_mangled) { - static Timer::Category func_cat(LLVM_PRETTY_FUNCTION); - Timer scoped_timer(func_cat, "%s", LLVM_PRETTY_FUNCTION); std::lock_guard guard(m_mutex); // Create the name index vector to be able to quickly search by name @@ -566,9 +564,7 @@ struct SymbolIndexComparator { void Symtab::SortSymbolIndexesByValue(std::vector &indexes, bool remove_duplicates) const { std::lock_guard guard(m_mutex); - - static Timer::Category func_cat(LLVM_PRETTY_FUNCTION); - Timer scoped_timer(func_cat, LLVM_PRETTY_FUNCTION); + LLDB_SCOPED_TIMER(); // No need to sort if we have zero or one items... if (indexes.size() <= 1) return; @@ -594,8 +590,7 @@ uint32_t Symtab::AppendSymbolIndexesWithName(ConstString symbol_name, std::vector &indexes) { std::lock_guard guard(m_mutex); - static Timer::Category func_cat(LLVM_PRETTY_FUNCTION); - Timer scoped_timer(func_cat, "%s", LLVM_PRETTY_FUNCTION); + LLDB_SCOPED_TIMER(); if (symbol_name) { if (!m_name_indexes_computed) InitNameIndexes(); @@ -611,8 +606,7 @@ uint32_t Symtab::AppendSymbolIndexesWithName(ConstString symbol_name, std::vector &indexes) { std::lock_guard guard(m_mutex); - static Timer::Category func_cat(LLVM_PRETTY_FUNCTION); - Timer scoped_timer(func_cat, "%s", LLVM_PRETTY_FUNCTION); + LLDB_SCOPED_TIMER(); if (symbol_name) { const size_t old_size = indexes.size(); if (!m_name_indexes_computed) @@ -741,8 +735,7 @@ Symtab::FindAllSymbolsWithNameAndType(ConstString name, std::vector &symbol_indexes) { std::lock_guard guard(m_mutex); - static Timer::Category func_cat(LLVM_PRETTY_FUNCTION); - Timer scoped_timer(func_cat, "%s", LLVM_PRETTY_FUNCTION); + LLDB_SCOPED_TIMER(); // Initialize all of the lookup by name indexes before converting NAME to a // uniqued string NAME_STR below. if (!m_name_indexes_computed) @@ -760,8 +753,7 @@ void Symtab::FindAllSymbolsWithNameAndType( Visibility symbol_visibility, std::vector &symbol_indexes) { std::lock_guard guard(m_mutex); - static Timer::Category func_cat(LLVM_PRETTY_FUNCTION); - Timer scoped_timer(func_cat, "%s", LLVM_PRETTY_FUNCTION); + LLDB_SCOPED_TIMER(); // Initialize all of the lookup by name indexes before converting NAME to a // uniqued string NAME_STR below. if (!m_name_indexes_computed) @@ -790,9 +782,7 @@ Symbol *Symtab::FindFirstSymbolWithNameAndType(ConstString name, Debug symbol_debug_type, Visibility symbol_visibility) { std::lock_guard guard(m_mutex); - - static Timer::Category func_cat(LLVM_PRETTY_FUNCTION); - Timer scoped_timer(func_cat, "%s", LLVM_PRETTY_FUNCTION); + LLDB_SCOPED_TIMER(); if (!m_name_indexes_computed) InitNameIndexes(); diff --git a/lldb/source/Target/Target.cpp b/lldb/source/Target/Target.cpp index bee87eb1b6c73..736864e021bbd 100644 --- a/lldb/source/Target/Target.cpp +++ b/lldb/source/Target/Target.cpp @@ -1400,9 +1400,7 @@ void Target::SetExecutableModule(ModuleSP &executable_sp, ClearModules(false); if (executable_sp) { - static Timer::Category func_cat(LLVM_PRETTY_FUNCTION); - Timer scoped_timer(func_cat, - "Target::SetExecutableModule (executable = '%s')", + LLDB_SCOPED_TIMERF("Target::SetExecutableModule (executable = '%s')", executable_sp->GetFileSpec().GetPath().c_str()); const bool notify = true; diff --git a/lldb/source/Target/TargetList.cpp b/lldb/source/Target/TargetList.cpp index 5bb6ca2a73e96..1e5856dd0b221 100644 --- a/lldb/source/Target/TargetList.cpp +++ b/lldb/source/Target/TargetList.cpp @@ -286,10 +286,9 @@ Status TargetList::CreateTargetInternal(Debugger &debugger, LoadDependentFiles load_dependent_files, lldb::PlatformSP &platform_sp, lldb::TargetSP &target_sp) { - static Timer::Category func_cat(LLVM_PRETTY_FUNCTION); - Timer scoped_timer( - func_cat, "TargetList::CreateTarget (file = '%s', arch = '%s')", - user_exe_path.str().c_str(), specified_arch.GetArchitectureName()); + LLDB_SCOPED_TIMERF("TargetList::CreateTarget (file = '%s', arch = '%s')", + user_exe_path.str().c_str(), + specified_arch.GetArchitectureName()); Status error; const bool is_dummy_target = false; diff --git a/lldb/tools/lldb-test/SystemInitializerTest.cpp b/lldb/tools/lldb-test/SystemInitializerTest.cpp index 10b90cdc64096..2f6eb8e21a21b 100644 --- a/lldb/tools/lldb-test/SystemInitializerTest.cpp +++ b/lldb/tools/lldb-test/SystemInitializerTest.cpp @@ -54,9 +54,6 @@ llvm::Error SystemInitializerTest::Initialize() { } void SystemInitializerTest::Terminate() { - static Timer::Category func_cat(LLVM_PRETTY_FUNCTION); - Timer scoped_timer(func_cat, LLVM_PRETTY_FUNCTION); - Debugger::SettingsTerminate(); // Terminate and unload and loaded system or user LLDB plug-ins From be85b3e4324b5a03abd929815b7fc1c2184db97a Mon Sep 17 00:00:00 2001 From: Paul Walker Date: Tue, 22 Dec 2020 17:05:02 +0000 Subject: [PATCH 103/378] Fix some misnamed variables in sve-fixed-length-int-minmax.ll. --- .../AArch64/sve-fixed-length-int-minmax.ll | 48 +++++++++---------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-minmax.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-minmax.ll index cc9e172de5f88..e94abe815f3c3 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-minmax.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-minmax.ll @@ -69,14 +69,14 @@ define void @smax_v64i8(<64 x i8>* %a, <64 x i8>* %b) #0 { ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].b, vl32 ; VBITS_EQ_256-DAG: mov w[[A:[0-9]+]], #32 -; VBITS_EQ_256-DAG: ld1b { [[OP1_LO:z[0-9]+]].b }, [[PG]]/z, [x0, x[[A]]] -; VBITS_EQ_256-DAG: ld1b { [[OP1_HI:z[0-9]+]].b }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1b { [[OP2_LO:z[0-9]+]].b }, [[PG]]/z, [x1, x[[A]]] -; VBITS_EQ_256-DAG: ld1b { [[OP2_HI:z[0-9]+]].b }, [[PG]]/z, [x1] +; VBITS_EQ_256-DAG: ld1b { [[OP1_LO:z[0-9]+]].b }, [[PG]]/z, [x0] +; VBITS_EQ_256-DAG: ld1b { [[OP1_HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[A]]] +; VBITS_EQ_256-DAG: ld1b { [[OP2_LO:z[0-9]+]].b }, [[PG]]/z, [x1] +; VBITS_EQ_256-DAG: ld1b { [[OP2_HI:z[0-9]+]].b }, [[PG]]/z, [x1, x[[A]]] ; VBITS_EQ_256-DAG: smax [[RES_LO:z[0-9]+]].b, [[PG]]/m, [[OP1_LO]].b, [[OP2_LO]].b ; VBITS_EQ_256-DAG: smax [[RES_HI:z[0-9]+]].b, [[PG]]/m, [[OP1_HI]].b, [[OP2_HI]].b -; VBITS_EQ_256-DAG: st1b { [[RES_LO]].b }, [[PG]], [x0, x[[A]]] -; VBITS_EQ_256-DAG: st1b { [[RES_HI]].b }, [[PG]], [x0] +; VBITS_EQ_256-DAG: st1b { [[RES_LO]].b }, [[PG]], [x0] +; VBITS_EQ_256-DAG: st1b { [[RES_HI]].b }, [[PG]], [x0, x[[A]]] ; VBITS_EQ_256-NEXT: ret %op1 = load <64 x i8>, <64 x i8>* %a %op2 = load <64 x i8>, <64 x i8>* %b @@ -442,14 +442,14 @@ define void @smin_v64i8(<64 x i8>* %a, <64 x i8>* %b) #0 { ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].b, vl32 ; VBITS_EQ_256-DAG: mov w[[A:[0-9]+]], #32 -; VBITS_EQ_256-DAG: ld1b { [[OP1_LO:z[0-9]+]].b }, [[PG]]/z, [x0, x[[A]]] -; VBITS_EQ_256-DAG: ld1b { [[OP1_HI:z[0-9]+]].b }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1b { [[OP2_LO:z[0-9]+]].b }, [[PG]]/z, [x1, x[[A]]] -; VBITS_EQ_256-DAG: ld1b { [[OP2_HI:z[0-9]+]].b }, [[PG]]/z, [x1] +; VBITS_EQ_256-DAG: ld1b { [[OP1_LO:z[0-9]+]].b }, [[PG]]/z, [x0] +; VBITS_EQ_256-DAG: ld1b { [[OP1_HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[A]]] +; VBITS_EQ_256-DAG: ld1b { [[OP2_LO:z[0-9]+]].b }, [[PG]]/z, [x1] +; VBITS_EQ_256-DAG: ld1b { [[OP2_HI:z[0-9]+]].b }, [[PG]]/z, [x1, x[[A]]] ; VBITS_EQ_256-DAG: smin [[RES_LO:z[0-9]+]].b, [[PG]]/m, [[OP1_LO]].b, [[OP2_LO]].b ; VBITS_EQ_256-DAG: smin [[RES_HI:z[0-9]+]].b, [[PG]]/m, [[OP1_HI]].b, [[OP2_HI]].b -; VBITS_EQ_256-DAG: st1b { [[RES_LO]].b }, [[PG]], [x0, x[[A]]] -; VBITS_EQ_256-DAG: st1b { [[RES_HI]].b }, [[PG]], [x0] +; VBITS_EQ_256-DAG: st1b { [[RES_LO]].b }, [[PG]], [x0] +; VBITS_EQ_256-DAG: st1b { [[RES_HI]].b }, [[PG]], [x0, x[[A]]] %op1 = load <64 x i8>, <64 x i8>* %a %op2 = load <64 x i8>, <64 x i8>* %b %res = call <64 x i8> @llvm.smin.v64i8(<64 x i8> %op1, <64 x i8> %op2) @@ -814,14 +814,14 @@ define void @umax_v64i8(<64 x i8>* %a, <64 x i8>* %b) #0 { ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].b, vl32 ; VBITS_EQ_256-DAG: mov w[[A:[0-9]+]], #32 -; VBITS_EQ_256-DAG: ld1b { [[OP1_LO:z[0-9]+]].b }, [[PG]]/z, [x0, x[[A]]] -; VBITS_EQ_256-DAG: ld1b { [[OP1_HI:z[0-9]+]].b }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1b { [[OP2_LO:z[0-9]+]].b }, [[PG]]/z, [x1, x[[A]]] -; VBITS_EQ_256-DAG: ld1b { [[OP2_HI:z[0-9]+]].b }, [[PG]]/z, [x1] +; VBITS_EQ_256-DAG: ld1b { [[OP1_LO:z[0-9]+]].b }, [[PG]]/z, [x0] +; VBITS_EQ_256-DAG: ld1b { [[OP1_HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[A]]] +; VBITS_EQ_256-DAG: ld1b { [[OP2_LO:z[0-9]+]].b }, [[PG]]/z, [x1] +; VBITS_EQ_256-DAG: ld1b { [[OP2_HI:z[0-9]+]].b }, [[PG]]/z, [x1, x[[A]]] ; VBITS_EQ_256-DAG: umax [[RES_LO:z[0-9]+]].b, [[PG]]/m, [[OP1_LO]].b, [[OP2_LO]].b ; VBITS_EQ_256-DAG: umax [[RES_HI:z[0-9]+]].b, [[PG]]/m, [[OP1_HI]].b, [[OP2_HI]].b -; VBITS_EQ_256-DAG: st1b { [[RES_LO]].b }, [[PG]], [x0, x[[A]]] -; VBITS_EQ_256-DAG: st1b { [[RES_HI]].b }, [[PG]], [x0] +; VBITS_EQ_256-DAG: st1b { [[RES_LO]].b }, [[PG]], [x0] +; VBITS_EQ_256-DAG: st1b { [[RES_HI]].b }, [[PG]], [x0, x[[A]]] ; VBITS_EQ_256-NEXT: ret %op1 = load <64 x i8>, <64 x i8>* %a %op2 = load <64 x i8>, <64 x i8>* %b @@ -1187,14 +1187,14 @@ define void @umin_v64i8(<64 x i8>* %a, <64 x i8>* %b) #0 { ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].b, vl32 ; VBITS_EQ_256-DAG: mov w[[A:[0-9]+]], #32 -; VBITS_EQ_256-DAG: ld1b { [[OP1_LO:z[0-9]+]].b }, [[PG]]/z, [x0, x[[A]]] -; VBITS_EQ_256-DAG: ld1b { [[OP1_HI:z[0-9]+]].b }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1b { [[OP2_LO:z[0-9]+]].b }, [[PG]]/z, [x1, x[[A]]] -; VBITS_EQ_256-DAG: ld1b { [[OP2_HI:z[0-9]+]].b }, [[PG]]/z, [x1] +; VBITS_EQ_256-DAG: ld1b { [[OP1_LO:z[0-9]+]].b }, [[PG]]/z, [x0] +; VBITS_EQ_256-DAG: ld1b { [[OP1_HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[A]]] +; VBITS_EQ_256-DAG: ld1b { [[OP2_LO:z[0-9]+]].b }, [[PG]]/z, [x1] +; VBITS_EQ_256-DAG: ld1b { [[OP2_HI:z[0-9]+]].b }, [[PG]]/z, [x1, x[[A]]] ; VBITS_EQ_256-DAG: umin [[RES_LO:z[0-9]+]].b, [[PG]]/m, [[OP1_LO]].b, [[OP2_LO]].b ; VBITS_EQ_256-DAG: umin [[RES_HI:z[0-9]+]].b, [[PG]]/m, [[OP1_HI]].b, [[OP2_HI]].b -; VBITS_EQ_256-DAG: st1b { [[RES_LO]].b }, [[PG]], [x0, x[[A]]] -; VBITS_EQ_256-DAG: st1b { [[RES_HI]].b }, [[PG]], [x0] +; VBITS_EQ_256-DAG: st1b { [[RES_LO]].b }, [[PG]], [x0] +; VBITS_EQ_256-DAG: st1b { [[RES_HI]].b }, [[PG]], [x0, x[[A]]] %op1 = load <64 x i8>, <64 x i8>* %a %op2 = load <64 x i8>, <64 x i8>* %b %res = call <64 x i8> @llvm.umin.v64i8(<64 x i8> %op1, <64 x i8> %op2) From 5d10b8ad595da87bec8c66ad70a8daf86cd9266b Mon Sep 17 00:00:00 2001 From: Nathan James Date: Tue, 22 Dec 2020 17:18:59 +0000 Subject: [PATCH 104/378] [ADT] Add resize_for_overwrite method to SmallVector. Analagous to the std::make_(unqiue|shared)_for_overwrite added in c++20. If T is POD, and the container gets larger, any new values added wont be initialized. This is useful when using SmallVector as a buffer where its planned to overwrite any potential new values added. If T is not POD, `new (Storage) T` functions identically to `new (Storage) T()` so this will function identically to `resize(size_type)`. Reviewed By: dexonsmith Differential Revision: https://reviews.llvm.org/D93532 --- llvm/include/llvm/ADT/SmallVector.h | 14 ++++++++++++-- llvm/unittests/ADT/SmallVectorTest.cpp | 25 +++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 2 deletions(-) diff --git a/llvm/include/llvm/ADT/SmallVector.h b/llvm/include/llvm/ADT/SmallVector.h index 0a0f09c68060b..3bbde2d9c0fbc 100644 --- a/llvm/include/llvm/ADT/SmallVector.h +++ b/llvm/include/llvm/ADT/SmallVector.h @@ -460,7 +460,8 @@ class SmallVectorImpl : public SmallVectorTemplateBase { this->Size = 0; } - void resize(size_type N) { +private: + template void resizeImpl(size_type N) { if (N < this->size()) { this->destroy_range(this->begin()+N, this->end()); this->set_size(N); @@ -468,11 +469,20 @@ class SmallVectorImpl : public SmallVectorTemplateBase { if (this->capacity() < N) this->grow(N); for (auto I = this->end(), E = this->begin() + N; I != E; ++I) - new (&*I) T(); + if (ForOverwrite) + new (&*I) T; + else + new (&*I) T(); this->set_size(N); } } +public: + void resize(size_type N) { resizeImpl(N); } + + /// Like resize, but \ref T is POD, the new values won't be initialized. + void resize_for_overwrite(size_type N) { resizeImpl(N); } + void resize(size_type N, const T &NV) { if (N == this->size()) return; diff --git a/llvm/unittests/ADT/SmallVectorTest.cpp b/llvm/unittests/ADT/SmallVectorTest.cpp index 957412f3083f3..e02c3e709667e 100644 --- a/llvm/unittests/ADT/SmallVectorTest.cpp +++ b/llvm/unittests/ADT/SmallVectorTest.cpp @@ -341,6 +341,31 @@ TYPED_TEST(SmallVectorTest, ResizeFillTest) { this->assertValuesInOrder(this->theVector, 3u, 77, 77, 77); } +TEST(SmallVectorTest, ResizeForOverwrite) { + { + // Heap allocated storage. + SmallVector V; + V.push_back(5); + V.pop_back(); + V.resize_for_overwrite(V.size() + 1); + EXPECT_EQ(5, V.back()); + V.pop_back(); + V.resize(V.size() + 1); + EXPECT_EQ(0, V.back()); + } + { + // Inline storage. + SmallVector V; + V.push_back(5); + V.pop_back(); + V.resize_for_overwrite(V.size() + 1); + EXPECT_EQ(5, V.back()); + V.pop_back(); + V.resize(V.size() + 1); + EXPECT_EQ(0, V.back()); + } +} + // Overflow past fixed size. TYPED_TEST(SmallVectorTest, OverflowTest) { SCOPED_TRACE("OverflowTest"); From f106b281be24df4b5ed4553c3c09c885610cd2b8 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Tue, 22 Dec 2020 09:47:11 -0800 Subject: [PATCH 105/378] [tests] precommit a test mentioned in review for D93317 --- .../Transforms/LoopVectorize/loop-form.ll | 88 +++++++++++++++++++ 1 file changed, 88 insertions(+) diff --git a/llvm/test/Transforms/LoopVectorize/loop-form.ll b/llvm/test/Transforms/LoopVectorize/loop-form.ll index 298143ba726c3..72f2215bb934b 100644 --- a/llvm/test/Transforms/LoopVectorize/loop-form.ll +++ b/llvm/test/Transforms/LoopVectorize/loop-form.ll @@ -338,3 +338,91 @@ if.end: if.end2: ret i32 1 } + +define i32 @multiple_latch1(i16* %p) { +; CHECK-LABEL: @multiple_latch1( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[I_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_BACKEDGE:%.*]] ] +; CHECK-NEXT: [[INC]] = add nsw i32 [[I_02]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], 16 +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY_BACKEDGE]], label [[FOR_SECOND:%.*]] +; CHECK: for.second: +; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I_02]] to i64 +; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] +; CHECK-NEXT: store i16 0, i16* [[B]], align 4 +; CHECK-NEXT: [[CMPS:%.*]] = icmp sgt i32 [[INC]], 16 +; CHECK-NEXT: br i1 [[CMPS]], label [[FOR_BODY_BACKEDGE]], label [[FOR_END:%.*]] +; CHECK: for.body.backedge: +; CHECK-NEXT: br label [[FOR_BODY]] +; CHECK: for.end: +; CHECK-NEXT: ret i32 0 +; +entry: + br label %for.body + +for.body: + %i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body.backedge] + %inc = add nsw i32 %i.02, 1 + %cmp = icmp slt i32 %inc, 16 + br i1 %cmp, label %for.body.backedge, label %for.second + +for.second: + %iprom = sext i32 %i.02 to i64 + %b = getelementptr inbounds i16, i16* %p, i64 %iprom + store i16 0, i16* %b, align 4 + %cmps = icmp sgt i32 %inc, 16 + br i1 %cmps, label %for.body.backedge, label %for.end + +for.body.backedge: + br label %for.body + +for.end: + ret i32 0 +} + + +; two back branches - loop simplify with convert this to the same form +; as previous before vectorizer sees it, but show that. +define i32 @multiple_latch2(i16* %p) { +; CHECK-LABEL: @multiple_latch2( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[I_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_BACKEDGE:%.*]] ] +; CHECK-NEXT: [[INC]] = add nsw i32 [[I_02]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], 16 +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY_BACKEDGE]], label [[FOR_SECOND:%.*]] +; CHECK: for.body.backedge: +; CHECK-NEXT: br label [[FOR_BODY]] +; CHECK: for.second: +; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I_02]] to i64 +; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] +; CHECK-NEXT: store i16 0, i16* [[B]], align 4 +; CHECK-NEXT: [[CMPS:%.*]] = icmp sgt i32 [[INC]], 16 +; CHECK-NEXT: br i1 [[CMPS]], label [[FOR_BODY_BACKEDGE]], label [[FOR_END:%.*]] +; CHECK: for.end: +; CHECK-NEXT: ret i32 0 +; +entry: + br label %for.body + +for.body: + %i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body ], [%inc, %for.second] + %inc = add nsw i32 %i.02, 1 + %cmp = icmp slt i32 %inc, 16 + br i1 %cmp, label %for.body, label %for.second + +for.second: + %iprom = sext i32 %i.02 to i64 + %b = getelementptr inbounds i16, i16* %p, i64 %iprom + store i16 0, i16* %b, align 4 + %cmps = icmp sgt i32 %inc, 16 + br i1 %cmps, label %for.body, label %for.end + +for.end: + ret i32 0 +} + +declare void @foo() From ac90bbc9cb8b905e4a8e7c9d2924a4d426c690aa Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Tue, 22 Dec 2020 17:48:01 +0000 Subject: [PATCH 106/378] [LoopDeletion] Add test case where outer loop needs to be deleted. In the test case @test1, the inner loop cannot be removed, because it has a live-out value. But the outer loop is a no-op and can be removed. --- .../LoopDeletion/noop-loops-with-subloops.ll | 172 ++++++++++++++++++ 1 file changed, 172 insertions(+) create mode 100644 llvm/test/Transforms/LoopDeletion/noop-loops-with-subloops.ll diff --git a/llvm/test/Transforms/LoopDeletion/noop-loops-with-subloops.ll b/llvm/test/Transforms/LoopDeletion/noop-loops-with-subloops.ll new file mode 100644 index 0000000000000..464c12f453a70 --- /dev/null +++ b/llvm/test/Transforms/LoopDeletion/noop-loops-with-subloops.ll @@ -0,0 +1,172 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -loop-deletion -verify-dom-info -S | FileCheck %s + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64" + +%pair_t = type { i64, i64 } + +; The loop %inner cannot be removed, because it has outgoing values. But the +; outer loop is a no-op and can be removed. +define void @test1(i64 %N, i64 %M, %pair_t* %ptr) willreturn { +; CHECK-LABEL: @test1( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[OUTER_HEADER:%.*]] +; CHECK: outer.header: +; CHECK-NEXT: [[OUTER_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[OUTER_IV_NEXT:%.*]], [[OUTER_LATCH:%.*]] ] +; CHECK-NEXT: br label [[INNER:%.*]] +; CHECK: inner: +; CHECK-NEXT: [[INNER_IV:%.*]] = phi i64 [ 0, [[OUTER_HEADER]] ], [ [[INNER_IV_NEXT:%.*]], [[INNER]] ] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr [[PAIR_T:%.*]], %pair_t* [[PTR:%.*]], i64 [[INNER_IV]] +; CHECK-NEXT: [[P:%.*]] = load [[PAIR_T]], %pair_t* [[GEP]], align 4 +; CHECK-NEXT: [[V_0:%.*]] = extractvalue [[PAIR_T]] [[P]], 0 +; CHECK-NEXT: [[V_1:%.*]] = extractvalue [[PAIR_T]] [[P]], 1 +; CHECK-NEXT: [[INNER_EC:%.*]] = icmp ult i64 [[V_0]], [[V_1]] +; CHECK-NEXT: [[INNER_IV_NEXT]] = add i64 [[INNER_IV]], 1 +; CHECK-NEXT: br i1 [[INNER_EC]], label [[OUTER_LATCH]], label [[INNER]] +; CHECK: outer.latch: +; CHECK-NEXT: [[LCSSA:%.*]] = phi i64 [ [[V_1]], [[INNER]] ] +; CHECK-NEXT: [[OUTER_EC:%.*]] = icmp ult i64 [[OUTER_IV]], [[LCSSA]] +; CHECK-NEXT: [[OUTER_IV_NEXT]] = add i64 [[OUTER_IV]], 1 +; CHECK-NEXT: br i1 [[OUTER_EC]], label [[EXIT:%.*]], label [[OUTER_HEADER]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %outer.header + +outer.header: + %outer.iv = phi i64 [ 0, %entry ], [ %outer.iv.next, %outer.latch ] + + br label %inner + +inner: + %inner.iv = phi i64 [ 0, %outer.header ], [ %inner.iv.next, %inner ] + %gep = getelementptr %pair_t, %pair_t* %ptr, i64 %inner.iv + %p = load %pair_t, %pair_t* %gep + %v.0 = extractvalue %pair_t %p, 0 + %v.1 = extractvalue %pair_t %p, 1 + %inner.ec = icmp ult i64 %v.0, %v.1 + %inner.iv.next = add i64 %inner.iv, 1 + br i1 %inner.ec, label %outer.latch, label %inner + +outer.latch: + %lcssa = phi i64 [ %v.1, %inner ] + %outer.ec = icmp ult i64 %outer.iv, %lcssa + %outer.iv.next = add i64 %outer.iv, 1 + br i1 %outer.ec, label %exit, label %outer.header + +exit: + ret void +} + +declare void @sideeffect() + +; Same as @test1, but with a call in the outer loop. Nothing can be deleted. +define void @test2_sideeffect_in_outer(i64 %N, i64 %M, %pair_t* %ptr) willreturn { +; CHECK-LABEL: @test2_sideeffect_in_outer( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[OUTER_HEADER:%.*]] +; CHECK: outer.header: +; CHECK-NEXT: [[OUTER_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[OUTER_IV_NEXT:%.*]], [[OUTER_LATCH:%.*]] ] +; CHECK-NEXT: br label [[INNER:%.*]] +; CHECK: inner: +; CHECK-NEXT: [[INNER_IV:%.*]] = phi i64 [ 0, [[OUTER_HEADER]] ], [ [[INNER_IV_NEXT:%.*]], [[INNER]] ] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr [[PAIR_T:%.*]], %pair_t* [[PTR:%.*]], i64 [[INNER_IV]] +; CHECK-NEXT: [[P:%.*]] = load [[PAIR_T]], %pair_t* [[GEP]], align 4 +; CHECK-NEXT: [[V_0:%.*]] = extractvalue [[PAIR_T]] [[P]], 0 +; CHECK-NEXT: [[V_1:%.*]] = extractvalue [[PAIR_T]] [[P]], 1 +; CHECK-NEXT: [[INNER_EC:%.*]] = icmp ult i64 [[V_0]], [[V_1]] +; CHECK-NEXT: [[INNER_IV_NEXT]] = add i64 [[INNER_IV]], 1 +; CHECK-NEXT: br i1 [[INNER_EC]], label [[OUTER_LATCH]], label [[INNER]] +; CHECK: outer.latch: +; CHECK-NEXT: [[LCSSA:%.*]] = phi i64 [ [[V_1]], [[INNER]] ] +; CHECK-NEXT: [[OUTER_EC:%.*]] = icmp ult i64 [[OUTER_IV]], [[LCSSA]] +; CHECK-NEXT: [[OUTER_IV_NEXT]] = add i64 [[OUTER_IV]], 1 +; CHECK-NEXT: call void @sideeffect() +; CHECK-NEXT: br i1 [[OUTER_EC]], label [[EXIT:%.*]], label [[OUTER_HEADER]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %outer.header + +outer.header: + %outer.iv = phi i64 [ 0, %entry ], [ %outer.iv.next, %outer.latch ] + + br label %inner + +inner: + %inner.iv = phi i64 [ 0, %outer.header ], [ %inner.iv.next, %inner ] + %gep = getelementptr %pair_t, %pair_t* %ptr, i64 %inner.iv + %p = load %pair_t, %pair_t* %gep + %v.0 = extractvalue %pair_t %p, 0 + %v.1 = extractvalue %pair_t %p, 1 + %inner.ec = icmp ult i64 %v.0, %v.1 + %inner.iv.next = add i64 %inner.iv, 1 + br i1 %inner.ec, label %outer.latch, label %inner + +outer.latch: + %lcssa = phi i64 [ %v.1, %inner ] + %outer.ec = icmp ult i64 %outer.iv, %lcssa + %outer.iv.next = add i64 %outer.iv, 1 + call void @sideeffect() + br i1 %outer.ec, label %exit, label %outer.header + +exit: + ret void +} + +; Same as @test1, but with a call in the inner loop. Nothing can be deleted. +define void @test3_sideeffect_in_inner(i64 %N, i64 %M, %pair_t* %ptr) willreturn { +; CHECK-LABEL: @test3_sideeffect_in_inner( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[OUTER_HEADER:%.*]] +; CHECK: outer.header: +; CHECK-NEXT: [[OUTER_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[OUTER_IV_NEXT:%.*]], [[OUTER_LATCH:%.*]] ] +; CHECK-NEXT: br label [[INNER:%.*]] +; CHECK: inner: +; CHECK-NEXT: [[INNER_IV:%.*]] = phi i64 [ 0, [[OUTER_HEADER]] ], [ [[INNER_IV_NEXT:%.*]], [[INNER]] ] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr [[PAIR_T:%.*]], %pair_t* [[PTR:%.*]], i64 [[INNER_IV]] +; CHECK-NEXT: [[P:%.*]] = load [[PAIR_T]], %pair_t* [[GEP]], align 4 +; CHECK-NEXT: [[V_0:%.*]] = extractvalue [[PAIR_T]] [[P]], 0 +; CHECK-NEXT: [[V_1:%.*]] = extractvalue [[PAIR_T]] [[P]], 1 +; CHECK-NEXT: [[INNER_EC:%.*]] = icmp ult i64 [[V_0]], [[V_1]] +; CHECK-NEXT: [[INNER_IV_NEXT]] = add i64 [[INNER_IV]], 1 +; CHECK-NEXT: call void @sideeffect() +; CHECK-NEXT: br i1 [[INNER_EC]], label [[OUTER_LATCH]], label [[INNER]] +; CHECK: outer.latch: +; CHECK-NEXT: [[LCSSA:%.*]] = phi i64 [ [[V_1]], [[INNER]] ] +; CHECK-NEXT: [[OUTER_EC:%.*]] = icmp ult i64 [[OUTER_IV]], [[LCSSA]] +; CHECK-NEXT: [[OUTER_IV_NEXT]] = add i64 [[OUTER_IV]], 1 +; CHECK-NEXT: br i1 [[OUTER_EC]], label [[EXIT:%.*]], label [[OUTER_HEADER]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %outer.header + +outer.header: + %outer.iv = phi i64 [ 0, %entry ], [ %outer.iv.next, %outer.latch ] + + br label %inner + +inner: + %inner.iv = phi i64 [ 0, %outer.header ], [ %inner.iv.next, %inner ] + %gep = getelementptr %pair_t, %pair_t* %ptr, i64 %inner.iv + %p = load %pair_t, %pair_t* %gep + %v.0 = extractvalue %pair_t %p, 0 + %v.1 = extractvalue %pair_t %p, 1 + %inner.ec = icmp ult i64 %v.0, %v.1 + %inner.iv.next = add i64 %inner.iv, 1 + call void @sideeffect() + br i1 %inner.ec, label %outer.latch, label %inner + +outer.latch: + %lcssa = phi i64 [ %v.1, %inner ] + %outer.ec = icmp ult i64 %outer.iv, %lcssa + %outer.iv.next = add i64 %outer.iv, 1 + br i1 %outer.ec, label %exit, label %outer.header + +exit: + ret void +} From f5071489ea8cf2771d7375534c122467a000b356 Mon Sep 17 00:00:00 2001 From: Nathan James Date: Tue, 22 Dec 2020 18:06:19 +0000 Subject: [PATCH 107/378] [ADT] Fix some tests after 5d10b8ad Some bots were failing due to signed/unsigned comparison. --- llvm/unittests/ADT/SmallVectorTest.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/llvm/unittests/ADT/SmallVectorTest.cpp b/llvm/unittests/ADT/SmallVectorTest.cpp index e02c3e709667e..d97ab577524f4 100644 --- a/llvm/unittests/ADT/SmallVectorTest.cpp +++ b/llvm/unittests/ADT/SmallVectorTest.cpp @@ -345,24 +345,24 @@ TEST(SmallVectorTest, ResizeForOverwrite) { { // Heap allocated storage. SmallVector V; - V.push_back(5); + V.push_back(5U); V.pop_back(); - V.resize_for_overwrite(V.size() + 1); - EXPECT_EQ(5, V.back()); + V.resize_for_overwrite(V.size() + 1U); + EXPECT_EQ(5U, V.back()); V.pop_back(); V.resize(V.size() + 1); - EXPECT_EQ(0, V.back()); + EXPECT_EQ(0U, V.back()); } { // Inline storage. SmallVector V; - V.push_back(5); + V.push_back(5U); V.pop_back(); - V.resize_for_overwrite(V.size() + 1); - EXPECT_EQ(5, V.back()); + V.resize_for_overwrite(V.size() + 1U); + EXPECT_EQ(5U, V.back()); V.pop_back(); V.resize(V.size() + 1); - EXPECT_EQ(0, V.back()); + EXPECT_EQ(0U, V.back()); } } From e17a00fc87bc163cc2438ce10faca51d94b91ab3 Mon Sep 17 00:00:00 2001 From: Andy Yankovsky Date: Tue, 22 Dec 2020 10:06:46 -0800 Subject: [PATCH 108/378] [lldb] Add SBType::IsScopedEnumerationType method Add a method to check if the type is a scoped enumeration (i.e. "enum class/struct"). Differential revision: https://reviews.llvm.org/D93690 --- lldb/bindings/interface/SBType.i | 3 +++ lldb/include/lldb/API/SBType.h | 2 ++ lldb/include/lldb/Symbol/CompilerType.h | 2 ++ lldb/include/lldb/Symbol/TypeSystem.h | 2 ++ lldb/source/API/SBType.cpp | 9 +++++++++ .../Plugins/TypeSystem/Clang/TypeSystemClang.cpp | 14 ++++++++++++++ .../Plugins/TypeSystem/Clang/TypeSystemClang.h | 2 ++ lldb/source/Symbol/CompilerType.cpp | 6 ++++++ lldb/test/API/python_api/type/TestTypeList.py | 10 ++++++++++ lldb/test/API/python_api/type/main.cpp | 5 +++++ 10 files changed, 55 insertions(+) diff --git a/lldb/bindings/interface/SBType.i b/lldb/bindings/interface/SBType.i index fd2dda1884549..b65eddb5fe29d 100644 --- a/lldb/bindings/interface/SBType.i +++ b/lldb/bindings/interface/SBType.i @@ -220,6 +220,9 @@ public: bool IsAnonymousType (); + bool + IsScopedEnumerationType (); + lldb::SBType GetPointerType(); diff --git a/lldb/include/lldb/API/SBType.h b/lldb/include/lldb/API/SBType.h index 5f487aa5d2581..9ac385c492ed5 100644 --- a/lldb/include/lldb/API/SBType.h +++ b/lldb/include/lldb/API/SBType.h @@ -131,6 +131,8 @@ class SBType { bool IsAnonymousType(); + bool IsScopedEnumerationType(); + lldb::SBType GetPointerType(); lldb::SBType GetPointeeType(); diff --git a/lldb/include/lldb/Symbol/CompilerType.h b/lldb/include/lldb/Symbol/CompilerType.h index f1cde0ac30849..1e0f520ab9596 100644 --- a/lldb/include/lldb/Symbol/CompilerType.h +++ b/lldb/include/lldb/Symbol/CompilerType.h @@ -82,6 +82,8 @@ class CompilerType { bool IsAnonymousType() const; + bool IsScopedEnumerationType() const; + bool IsBeingDefined() const; bool IsCharType() const; diff --git a/lldb/include/lldb/Symbol/TypeSystem.h b/lldb/include/lldb/Symbol/TypeSystem.h index 4c51d290ad2c5..b8393b9c39e1d 100644 --- a/lldb/include/lldb/Symbol/TypeSystem.h +++ b/lldb/include/lldb/Symbol/TypeSystem.h @@ -175,6 +175,8 @@ class TypeSystem : public PluginInterface { return false; } + virtual bool IsScopedEnumerationType(lldb::opaque_compiler_type_t type) = 0; + virtual bool IsPossibleDynamicType(lldb::opaque_compiler_type_t type, CompilerType *target_type, // Can pass NULL bool check_cplusplus, bool check_objc) = 0; diff --git a/lldb/source/API/SBType.cpp b/lldb/source/API/SBType.cpp index 0a99ac0f22923..7d8d4cfeef4f8 100644 --- a/lldb/source/API/SBType.cpp +++ b/lldb/source/API/SBType.cpp @@ -271,6 +271,14 @@ bool SBType::IsAnonymousType() { return m_opaque_sp->GetCompilerType(true).IsAnonymousType(); } +bool SBType::IsScopedEnumerationType() { + LLDB_RECORD_METHOD_NO_ARGS(bool, SBType, IsScopedEnumerationType); + + if (!IsValid()) + return false; + return m_opaque_sp->GetCompilerType(true).IsScopedEnumerationType(); +} + lldb::SBType SBType::GetFunctionReturnType() { LLDB_RECORD_METHOD_NO_ARGS(lldb::SBType, SBType, GetFunctionReturnType); @@ -935,6 +943,7 @@ void RegisterMethods(Registry &R) { LLDB_REGISTER_METHOD(bool, SBType, IsPolymorphicClass, ()); LLDB_REGISTER_METHOD(bool, SBType, IsTypedefType, ()); LLDB_REGISTER_METHOD(bool, SBType, IsAnonymousType, ()); + LLDB_REGISTER_METHOD(bool, SBType, IsScopedEnumerationType, ()); LLDB_REGISTER_METHOD(lldb::SBType, SBType, GetFunctionReturnType, ()); LLDB_REGISTER_METHOD(lldb::SBTypeList, SBType, GetFunctionArgumentTypes, ()); diff --git a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp index 894faa8474507..d1a9e9387292a 100644 --- a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp +++ b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp @@ -3147,6 +3147,20 @@ bool TypeSystemClang::IsEnumerationType(lldb::opaque_compiler_type_t type, return false; } +bool TypeSystemClang::IsScopedEnumerationType( + lldb::opaque_compiler_type_t type) { + if (type) { + const clang::EnumType *enum_type = llvm::dyn_cast( + GetCanonicalQualType(type)->getCanonicalTypeInternal()); + + if (enum_type) { + return enum_type->isScopedEnumeralType(); + } + } + + return false; +} + bool TypeSystemClang::IsPointerType(lldb::opaque_compiler_type_t type, CompilerType *pointee_type) { if (type) { diff --git a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h index 484c251aa00e1..7b16579cf240d 100644 --- a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h +++ b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h @@ -600,6 +600,8 @@ class TypeSystemClang : public TypeSystem { bool IsEnumerationType(lldb::opaque_compiler_type_t type, bool &is_signed) override; + bool IsScopedEnumerationType(lldb::opaque_compiler_type_t type) override; + static bool IsObjCClassType(const CompilerType &type); static bool IsObjCClassTypeAndHasIVars(const CompilerType &type, diff --git a/lldb/source/Symbol/CompilerType.cpp b/lldb/source/Symbol/CompilerType.cpp index c2f68283f6036..2c5910a683fac 100644 --- a/lldb/source/Symbol/CompilerType.cpp +++ b/lldb/source/Symbol/CompilerType.cpp @@ -40,6 +40,12 @@ bool CompilerType::IsAnonymousType() const { return false; } +bool CompilerType::IsScopedEnumerationType() const { + if (IsValid()) + return m_type_system->IsScopedEnumerationType(m_type); + return false; +} + bool CompilerType::IsArrayType(CompilerType *element_type_ptr, uint64_t *size, bool *is_incomplete) const { if (IsValid()) diff --git a/lldb/test/API/python_api/type/TestTypeList.py b/lldb/test/API/python_api/type/TestTypeList.py index 901ddc62f4e56..6ed6ca42727d2 100644 --- a/lldb/test/API/python_api/type/TestTypeList.py +++ b/lldb/test/API/python_api/type/TestTypeList.py @@ -144,3 +144,13 @@ def test(self): myint_type = target.FindFirstType('myint') self.DebugSBType(myint_type) self.assertTrue(myint_arr_element_type == myint_type) + + # Test enum methods. + enum_type = target.FindFirstType('EnumType') + self.assertTrue(enum_type) + self.DebugSBType(enum_type) + self.assertFalse(enum_type.IsScopedEnumerationType()) + scoped_enum_type = target.FindFirstType('ScopedEnumType') + self.assertTrue(scoped_enum_type) + self.DebugSBType(scoped_enum_type) + self.assertTrue(scoped_enum_type.IsScopedEnumerationType()) diff --git a/lldb/test/API/python_api/type/main.cpp b/lldb/test/API/python_api/type/main.cpp index 13e6bbc127ba6..5b96f47ea3664 100644 --- a/lldb/test/API/python_api/type/main.cpp +++ b/lldb/test/API/python_api/type/main.cpp @@ -29,6 +29,8 @@ class Task { {} }; +enum EnumType {}; +enum class ScopedEnumType {}; int main (int argc, char const *argv[]) { @@ -59,5 +61,8 @@ int main (int argc, char const *argv[]) typedef int myint; myint myint_arr[] = {1, 2, 3}; + EnumType enum_type; + ScopedEnumType scoped_enum_type; + return 0; // Break at this line } From 1432ae57bf6e4022b6f4541c9225674ee6b19c23 Mon Sep 17 00:00:00 2001 From: Andy Yankovsky Date: Tue, 22 Dec 2020 10:07:44 -0800 Subject: [PATCH 109/378] [lldb] Add SBType::GetEnumerationIntegerType method Add a method for getting the enumeration underlying type. Differential revision: https://reviews.llvm.org/D93696 --- lldb/bindings/interface/SBType.i | 3 +++ lldb/include/lldb/API/SBType.h | 3 +++ lldb/include/lldb/Symbol/CompilerType.h | 2 ++ lldb/include/lldb/Symbol/TypeSystem.h | 3 +++ lldb/source/API/SBType.cpp | 11 +++++++++++ .../Plugins/TypeSystem/Clang/TypeSystemClang.cpp | 7 +++++++ .../Plugins/TypeSystem/Clang/TypeSystemClang.h | 3 +++ lldb/source/Symbol/CompilerType.cpp | 6 ++++++ lldb/test/API/python_api/type/TestTypeList.py | 13 +++++++++++++ lldb/test/API/python_api/type/main.cpp | 2 ++ 10 files changed, 53 insertions(+) diff --git a/lldb/bindings/interface/SBType.i b/lldb/bindings/interface/SBType.i index b65eddb5fe29d..2d9a4a4d11d1d 100644 --- a/lldb/bindings/interface/SBType.i +++ b/lldb/bindings/interface/SBType.i @@ -244,6 +244,9 @@ public: lldb::SBType GetCanonicalType(); + lldb::SBType + GetEnumerationIntegerType(); + lldb::SBType GetArrayElementType (); diff --git a/lldb/include/lldb/API/SBType.h b/lldb/include/lldb/API/SBType.h index 9ac385c492ed5..529b4d0eeffc4 100644 --- a/lldb/include/lldb/API/SBType.h +++ b/lldb/include/lldb/API/SBType.h @@ -152,6 +152,9 @@ class SBType { lldb::SBType GetVectorElementType(); lldb::SBType GetCanonicalType(); + + lldb::SBType GetEnumerationIntegerType(); + // Get the "lldb::BasicType" enumeration for a type. If a type is not a basic // type eBasicTypeInvalid will be returned lldb::BasicType GetBasicType(); diff --git a/lldb/include/lldb/Symbol/CompilerType.h b/lldb/include/lldb/Symbol/CompilerType.h index 1e0f520ab9596..5a0e8e57200df 100644 --- a/lldb/include/lldb/Symbol/CompilerType.h +++ b/lldb/include/lldb/Symbol/CompilerType.h @@ -187,6 +187,8 @@ class CompilerType { CompilerType GetFullyUnqualifiedType() const; + CompilerType GetEnumerationIntegerType() const; + /// Returns -1 if this isn't a function of if the function doesn't /// have a prototype Returns a value >= 0 if there is a prototype. int GetFunctionArgumentCount() const; diff --git a/lldb/include/lldb/Symbol/TypeSystem.h b/lldb/include/lldb/Symbol/TypeSystem.h index b8393b9c39e1d..1fad8f61ac370 100644 --- a/lldb/include/lldb/Symbol/TypeSystem.h +++ b/lldb/include/lldb/Symbol/TypeSystem.h @@ -227,6 +227,9 @@ class TypeSystem : public PluginInterface { virtual CompilerType GetCanonicalType(lldb::opaque_compiler_type_t type) = 0; + virtual CompilerType + GetEnumerationIntegerType(lldb::opaque_compiler_type_t type) = 0; + // Returns -1 if this isn't a function of if the function doesn't have a // prototype Returns a value >= 0 if there is a prototype. virtual int GetFunctionArgumentCount(lldb::opaque_compiler_type_t type) = 0; diff --git a/lldb/source/API/SBType.cpp b/lldb/source/API/SBType.cpp index 7d8d4cfeef4f8..550c4b065914a 100644 --- a/lldb/source/API/SBType.cpp +++ b/lldb/source/API/SBType.cpp @@ -344,6 +344,16 @@ lldb::SBType SBType::GetCanonicalType() { return LLDB_RECORD_RESULT(SBType()); } +SBType SBType::GetEnumerationIntegerType() { + LLDB_RECORD_METHOD_NO_ARGS(lldb::SBType, SBType, GetEnumerationIntegerType); + + if (IsValid()) { + return LLDB_RECORD_RESULT( + SBType(m_opaque_sp->GetCompilerType(true).GetEnumerationIntegerType())); + } + return LLDB_RECORD_RESULT(SBType()); +} + lldb::BasicType SBType::GetBasicType() { LLDB_RECORD_METHOD_NO_ARGS(lldb::BasicType, SBType, GetBasicType); @@ -952,6 +962,7 @@ void RegisterMethods(Registry &R) { GetMemberFunctionAtIndex, (uint32_t)); LLDB_REGISTER_METHOD(lldb::SBType, SBType, GetUnqualifiedType, ()); LLDB_REGISTER_METHOD(lldb::SBType, SBType, GetCanonicalType, ()); + LLDB_REGISTER_METHOD(lldb::SBType, SBType, GetEnumerationIntegerType, ()); LLDB_REGISTER_METHOD(lldb::BasicType, SBType, GetBasicType, ()); LLDB_REGISTER_METHOD(lldb::SBType, SBType, GetBasicType, (lldb::BasicType)); LLDB_REGISTER_METHOD(uint32_t, SBType, GetNumberOfDirectBaseClasses, ()); diff --git a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp index d1a9e9387292a..4f55cf7cfa79a 100644 --- a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp +++ b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp @@ -4195,6 +4195,13 @@ TypeSystemClang::GetFullyUnqualifiedType(lldb::opaque_compiler_type_t type) { return CompilerType(); } +CompilerType +TypeSystemClang::GetEnumerationIntegerType(lldb::opaque_compiler_type_t type) { + if (type) + return GetEnumerationIntegerType(GetType(GetCanonicalQualType(type))); + return CompilerType(); +} + int TypeSystemClang::GetFunctionArgumentCount( lldb::opaque_compiler_type_t type) { if (type) { diff --git a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h index 7b16579cf240d..d24c5958204ff 100644 --- a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h +++ b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h @@ -678,6 +678,9 @@ class TypeSystemClang : public TypeSystem { CompilerType GetFullyUnqualifiedType(lldb::opaque_compiler_type_t type) override; + CompilerType + GetEnumerationIntegerType(lldb::opaque_compiler_type_t type) override; + // Returns -1 if this isn't a function of if the function doesn't have a // prototype Returns a value >= 0 if there is a prototype. int GetFunctionArgumentCount(lldb::opaque_compiler_type_t type) override; diff --git a/lldb/source/Symbol/CompilerType.cpp b/lldb/source/Symbol/CompilerType.cpp index 2c5910a683fac..4f0c3b366af5b 100644 --- a/lldb/source/Symbol/CompilerType.cpp +++ b/lldb/source/Symbol/CompilerType.cpp @@ -350,6 +350,12 @@ CompilerType CompilerType::GetFullyUnqualifiedType() const { return CompilerType(); } +CompilerType CompilerType::GetEnumerationIntegerType() const { + if (IsValid()) + return m_type_system->GetEnumerationIntegerType(m_type); + return CompilerType(); +} + int CompilerType::GetFunctionArgumentCount() const { if (IsValid()) { return m_type_system->GetFunctionArgumentCount(m_type); diff --git a/lldb/test/API/python_api/type/TestTypeList.py b/lldb/test/API/python_api/type/TestTypeList.py index 6ed6ca42727d2..ff560de2f96f1 100644 --- a/lldb/test/API/python_api/type/TestTypeList.py +++ b/lldb/test/API/python_api/type/TestTypeList.py @@ -150,7 +150,20 @@ def test(self): self.assertTrue(enum_type) self.DebugSBType(enum_type) self.assertFalse(enum_type.IsScopedEnumerationType()) + scoped_enum_type = target.FindFirstType('ScopedEnumType') self.assertTrue(scoped_enum_type) self.DebugSBType(scoped_enum_type) self.assertTrue(scoped_enum_type.IsScopedEnumerationType()) + int_scoped_enum_type = scoped_enum_type.GetEnumerationIntegerType() + self.assertTrue(int_scoped_enum_type) + self.DebugSBType(int_scoped_enum_type) + self.assertEquals(int_scoped_enum_type.GetName(), 'int') + + enum_uchar = target.FindFirstType('EnumUChar') + self.assertTrue(enum_uchar) + self.DebugSBType(enum_uchar) + int_enum_uchar = enum_uchar.GetEnumerationIntegerType() + self.assertTrue(int_enum_uchar) + self.DebugSBType(int_enum_uchar) + self.assertEquals(int_enum_uchar.GetName(), 'unsigned char') diff --git a/lldb/test/API/python_api/type/main.cpp b/lldb/test/API/python_api/type/main.cpp index 5b96f47ea3664..b1ef625283855 100644 --- a/lldb/test/API/python_api/type/main.cpp +++ b/lldb/test/API/python_api/type/main.cpp @@ -31,6 +31,7 @@ class Task { enum EnumType {}; enum class ScopedEnumType {}; +enum class EnumUChar : unsigned char {}; int main (int argc, char const *argv[]) { @@ -63,6 +64,7 @@ int main (int argc, char const *argv[]) EnumType enum_type; ScopedEnumType scoped_enum_type; + EnumUChar scoped_enum_type_uchar; return 0; // Break at this line } From 612ddc3117ce7715a3634fa0e0c5c6cdd3619d6b Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Tue, 22 Dec 2020 13:14:34 -0500 Subject: [PATCH 110/378] [OpenMP][Docs] Updated the faq about building an OpenMP offloading capable compiler After some issues about building runtimes along with LLVM were fixed, building an OpenMP offloading capable compiler is pretty simple. This patch updates the FAQ part in the doc. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D93671 --- openmp/docs/SupportAndFAQ.rst | 21 +++++---------------- 1 file changed, 5 insertions(+), 16 deletions(-) diff --git a/openmp/docs/SupportAndFAQ.rst b/openmp/docs/SupportAndFAQ.rst index 6b8f0d2f72b0d..37c5bcecfcccd 100644 --- a/openmp/docs/SupportAndFAQ.rst +++ b/openmp/docs/SupportAndFAQ.rst @@ -52,22 +52,11 @@ All patches go through the regular `LLVM review process Q: How to build an OpenMP offload capable compiler? ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -To build an *effective* OpenMP offload capable compiler we recommend a two -stage build. The first stage Clang does not require to be offload capable but -all backends that are targeted by OpenMP need to be enabled. By default, Clang -will be build with all backends enabled. This initial (stage 1) Clang is used -to create a second Clang compiler that is offload capable as well as the -:ref:`device runtime libraries ` that will be linked into the -offloaded code to provide OpenMP runtime support on the device. - -Generic information about building LLVM is available `here -`__. The CMake options for the -second stage Clang should include: - -- `LIBOMPTARGET_NVPTX_CUDA_COMPILER=$STAGE1/bin/clang` to use the stage one - compiler for the device runtime compilation. -- `LIBOMPTARGET_NVPTX_ENABLE_BCLIB=ON` to enable efficient device runtimes in - bitcode format. +To build an *effective* OpenMP offload capable compiler, only one extra CMake +option, `LLVM_ENABLE_RUNTIMES="OPENMP"`, is needed when building LLVM (Generic +information about building LLVM is available `here `__.). +Make sure all backends that are targeted by OpenMP to be enabled. By default, +Clang will be build with all backends enabled. If your build machine is not the target machine or automatic detection of the available GPUs failed, you should also set: From 85d4a4bcc717a31ff40c4bd979dd6d78beb84b43 Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Tue, 22 Dec 2020 10:11:31 -0800 Subject: [PATCH 111/378] Revert "Fix memory leak complicated non-type template arguments." This reverts commit ed13d8c66781b50ff007cb089c5905f9bb9e8af2. This is part of 5 commits being reverted due to https://crbug.com/1161059. See bug for repro. --- clang/include/clang/AST/ASTContext.h | 4 ++-- clang/lib/AST/TemplateBase.cpp | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/clang/include/clang/AST/ASTContext.h b/clang/include/clang/AST/ASTContext.h index a9bfdb4d5fa5b..0c5d82b3e9aac 100644 --- a/clang/include/clang/AST/ASTContext.h +++ b/clang/include/clang/AST/ASTContext.h @@ -2818,8 +2818,8 @@ class ASTContext : public RefCountedBase { /// for destruction. template void addDestruction(T *Ptr) const { if (!std::is_trivially_destructible::value) { - auto DestroyPtr = [](void *V) { ((T*)V)->~T(); }; - AddDeallocation(DestroyPtr, (void*)Ptr); + auto DestroyPtr = [](void *V) { static_cast(V)->~T(); }; + AddDeallocation(DestroyPtr, Ptr); } } diff --git a/clang/lib/AST/TemplateBase.cpp b/clang/lib/AST/TemplateBase.cpp index a746db315d858..0029c90a0ab65 100644 --- a/clang/lib/AST/TemplateBase.cpp +++ b/clang/lib/AST/TemplateBase.cpp @@ -137,7 +137,6 @@ TemplateArgument::TemplateArgument(const ASTContext &Ctx, QualType Type, else { Value.Kind = UncommonValue; Value.Value = new (Ctx) APValue(V); - Ctx.addDestruction(Value.Value); Value.Type = Type.getAsOpaquePtr(); } } From ab7a60eb4100ab197665b86f682dad0e787a4fed Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Tue, 22 Dec 2020 10:12:33 -0800 Subject: [PATCH 112/378] Revert "Fix MSVC "not all control paths return a value" warnings. NFCI." This reverts commit 7e84aa1b81e72d44bcc58ffe1731bfc7abb73ce0. This is part of 5 commits being reverted due to https://crbug.com/1161059. See bug for repro. --- clang/lib/Sema/SemaTemplate.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp index 9d31a87012419..74a31c0a1b659 100644 --- a/clang/lib/Sema/SemaTemplate.cpp +++ b/clang/lib/Sema/SemaTemplate.cpp @@ -7637,7 +7637,6 @@ static Expr *BuildExpressionFromNonTypeTemplateArgumentValue( auto *OVE = new (S.Context) OpaqueValueExpr(Loc, T, VK); return ConstantExpr::Create(S.Context, OVE, Val); } - llvm_unreachable("Unhandled APValue::ValueKind enum"); } ExprResult @@ -7667,7 +7666,6 @@ Sema::BuildExpressionFromNonTypeTemplateArgument(const TemplateArgument &Arg, return BuildExpressionFromNonTypeTemplateArgumentValue( *this, Arg.getUncommonValueType(), Arg.getAsUncommonValue(), Loc); } - llvm_unreachable("Unhandled TemplateArgument::ArgKind enum"); } /// Match two template parameters within template parameter lists. From 208023233398a677cc0aacb8153be9801db03af6 Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Tue, 22 Dec 2020 10:12:37 -0800 Subject: [PATCH 113/378] Revert "[c++20] P1907R1: Support for generalized non-type template arguments of scalar type." This reverts commit 9e08e51a20d0d2b1c5724bb17e969d036fced4cd. This is part of 5 commits being reverted due to https://crbug.com/1161059. See bug for repro. --- clang/include/clang/AST/PropertiesBase.td | 12 -- clang/include/clang/AST/RecursiveASTVisitor.h | 2 - .../clang/AST/TemplateArgumentVisitor.h | 2 - clang/include/clang/AST/TemplateBase.h | 40 +--- .../clang/Basic/DiagnosticSemaKinds.td | 5 + clang/include/clang/Sema/Sema.h | 4 +- .../clang/Serialization/ASTRecordWriter.h | 1 - clang/lib/AST/ASTContext.cpp | 5 - clang/lib/AST/ASTImporter.cpp | 11 - clang/lib/AST/ASTStructuralEquivalence.cpp | 4 - clang/lib/AST/Decl.cpp | 4 - clang/lib/AST/ItaniumMangle.cpp | 43 +--- clang/lib/AST/MicrosoftMangle.cpp | 11 - clang/lib/AST/ODRHash.cpp | 2 - clang/lib/AST/StmtProfile.cpp | 6 - clang/lib/AST/TemplateBase.cpp | 84 +------- clang/lib/AST/TypeLoc.cpp | 1 - clang/lib/CodeGen/CGDebugInfo.cpp | 8 - clang/lib/CodeGen/CGExprConstant.cpp | 10 +- clang/lib/Index/USRGeneration.cpp | 4 - clang/lib/Sema/SemaLookup.cpp | 1 - clang/lib/Sema/SemaOverload.cpp | 4 +- clang/lib/Sema/SemaTemplate.cpp | 200 +++++++----------- clang/lib/Sema/SemaTemplateDeduction.cpp | 94 ++++---- clang/lib/Sema/SemaTemplateInstantiate.cpp | 8 +- clang/lib/Sema/SemaTemplateVariadic.cpp | 2 - clang/lib/Sema/TreeTransform.h | 12 +- clang/lib/Serialization/ASTReader.cpp | 1 - clang/lib/Serialization/ASTWriter.cpp | 1 - clang/test/CodeGenCXX/mangle-ms-templates.cpp | 10 - clang/test/CodeGenCXX/mangle-template.cpp | 40 +--- clang/test/CodeGenCXX/template-arguments.cpp | 81 ------- .../SemaTemplate/temp_arg_nontype_cxx17.cpp | 40 ++-- .../SemaTemplate/temp_arg_nontype_cxx20.cpp | 48 ++--- clang/tools/libclang/CIndex.cpp | 5 - clang/tools/libclang/CXCursor.cpp | 3 - 36 files changed, 188 insertions(+), 621 deletions(-) delete mode 100644 clang/test/CodeGenCXX/template-arguments.cpp diff --git a/clang/include/clang/AST/PropertiesBase.td b/clang/include/clang/AST/PropertiesBase.td index dbe75ab9de194..ba0f237a3bc3c 100644 --- a/clang/include/clang/AST/PropertiesBase.td +++ b/clang/include/clang/AST/PropertiesBase.td @@ -72,7 +72,6 @@ class CountPropertyType : PropertyType { def APInt : PropertyType<"llvm::APInt"> { let PassByReference = 1; } def APSInt : PropertyType<"llvm::APSInt"> { let PassByReference = 1; } -def APValue : PropertyType { let PassByReference = 1; } def ArraySizeModifier : EnumPropertyType<"ArrayType::ArraySizeModifier">; def AttrKind : EnumPropertyType<"attr::Kind">; def AutoTypeKeyword : EnumPropertyType; @@ -451,17 +450,6 @@ let Class = PropertyTypeCase in { return TemplateArgument(ctx, value, type); }]>; } -let Class = PropertyTypeCase in { - def : Property<"value", APValue> { - let Read = [{ node.getAsUncommonValue() }]; - } - def : Property<"type", QualType> { - let Read = [{ node.getUncommonValueType() }]; - } - def : Creator<[{ - return TemplateArgument(ctx, type, value); - }]>; -} let Class = PropertyTypeCase in { def : Property<"name", TemplateName> { let Read = [{ node.getAsTemplateOrTemplatePattern() }]; diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h index 1426e569eabe1..505ea700fd0e0 100644 --- a/clang/include/clang/AST/RecursiveASTVisitor.h +++ b/clang/include/clang/AST/RecursiveASTVisitor.h @@ -768,7 +768,6 @@ bool RecursiveASTVisitor::TraverseTemplateArgument( case TemplateArgument::Declaration: case TemplateArgument::Integral: case TemplateArgument::NullPtr: - case TemplateArgument::UncommonValue: return true; case TemplateArgument::Type: @@ -802,7 +801,6 @@ bool RecursiveASTVisitor::TraverseTemplateArgumentLoc( case TemplateArgument::Declaration: case TemplateArgument::Integral: case TemplateArgument::NullPtr: - case TemplateArgument::UncommonValue: return true; case TemplateArgument::Type: { diff --git a/clang/include/clang/AST/TemplateArgumentVisitor.h b/clang/include/clang/AST/TemplateArgumentVisitor.h index 8c0da70b25eb9..190aa97adf455 100644 --- a/clang/include/clang/AST/TemplateArgumentVisitor.h +++ b/clang/include/clang/AST/TemplateArgumentVisitor.h @@ -37,7 +37,6 @@ class Base { DISPATCH(Declaration); DISPATCH(NullPtr); DISPATCH(Integral); - DISPATCH(UncommonValue); DISPATCH(Template); DISPATCH(TemplateExpansion); DISPATCH(Expression); @@ -60,7 +59,6 @@ class Base { VISIT_METHOD(Declaration); VISIT_METHOD(NullPtr); VISIT_METHOD(Integral); - VISIT_METHOD(UncommonValue); VISIT_METHOD(Template); VISIT_METHOD(TemplateExpansion); VISIT_METHOD(Expression); diff --git a/clang/include/clang/AST/TemplateBase.h b/clang/include/clang/AST/TemplateBase.h index 9968143e87610..abf873a7ee40f 100644 --- a/clang/include/clang/AST/TemplateBase.h +++ b/clang/include/clang/AST/TemplateBase.h @@ -51,7 +51,6 @@ template <> struct PointerLikeTypeTraits { namespace clang { -class APValue; class ASTContext; class DiagnosticBuilder; class Expr; @@ -83,12 +82,6 @@ class TemplateArgument { /// that was provided for an integral non-type template parameter. Integral, - /// The template argument is a non-type template argument that can't be - /// represented by the special-case Declaration, NullPtr, or Integral - /// forms. These values are only ever produced by constant evaluation, - /// so cannot be dependent. - UncommonValue, - /// The template argument is a template name that was provided for a /// template template parameter. Template, @@ -132,11 +125,6 @@ class TemplateArgument { }; void *Type; }; - struct V { - unsigned Kind; - const APValue *Value; - void *Type; - }; struct A { unsigned Kind; unsigned NumArgs; @@ -154,7 +142,6 @@ class TemplateArgument { union { struct DA DeclArg; struct I Integer; - struct V Value; struct A Args; struct TA TemplateArg; struct TV TypeOrValue; @@ -170,8 +157,9 @@ class TemplateArgument { TypeOrValue.V = reinterpret_cast(T.getAsOpaquePtr()); } - /// Construct a template argument that refers to a (non-dependent) - /// declaration. + /// Construct a template argument that refers to a + /// declaration, which is either an external declaration or a + /// template declaration. TemplateArgument(ValueDecl *D, QualType QT) { assert(D && "Expected decl"); DeclArg.Kind = Declaration; @@ -181,11 +169,7 @@ class TemplateArgument { /// Construct an integral constant template argument. The memory to /// store the value is allocated with Ctx. - TemplateArgument(const ASTContext &Ctx, const llvm::APSInt &Value, - QualType Type); - - /// Construct a template argument from an arbitrary constant value. - TemplateArgument(const ASTContext &Ctx, QualType Type, const APValue &Value); + TemplateArgument(ASTContext &Ctx, const llvm::APSInt &Value, QualType Type); /// Construct an integral constant template argument with the same /// value as Other but a different type. @@ -356,16 +340,6 @@ class TemplateArgument { Integer.Type = T.getAsOpaquePtr(); } - /// Get the value of an UncommonValue. - const APValue &getAsUncommonValue() const { - return *Value.Value; - } - - /// Get the type of an UncommonValue. - QualType getUncommonValueType() const { - return QualType::getFromOpaquePtr(Value.Type); - } - /// If this is a non-type template argument, get its type. Otherwise, /// returns a null QualType. QualType getNonTypeTemplateArgumentType() const; @@ -510,7 +484,6 @@ class TemplateArgumentLoc { assert(Argument.getKind() == TemplateArgument::NullPtr || Argument.getKind() == TemplateArgument::Integral || Argument.getKind() == TemplateArgument::Declaration || - Argument.getKind() == TemplateArgument::UncommonValue || Argument.getKind() == TemplateArgument::Expression); } @@ -569,11 +542,6 @@ class TemplateArgumentLoc { return LocInfo.getAsExpr(); } - Expr *getSourceUncommonValueExpression() const { - assert(Argument.getKind() == TemplateArgument::UncommonValue); - return LocInfo.getAsExpr(); - } - NestedNameSpecifierLoc getTemplateQualifierLoc() const { if (Argument.getKind() != TemplateArgument::Template && Argument.getKind() != TemplateArgument::TemplateExpansion) diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index ace3064be7bc8..24c2bb57b6f95 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -4656,6 +4656,8 @@ def err_non_type_template_arg_subobject : Error< "non-type template argument refers to subobject '%0'">; def err_non_type_template_arg_addr_label_diff : Error< "template argument / label address difference / what did you expect?">; +def err_non_type_template_arg_unsupported : Error< + "sorry, non-type template argument of type %0 is not yet supported">; def err_template_arg_not_convertible : Error< "non-type template argument of type %0 cannot be converted to a value " "of type %1">; @@ -4707,6 +4709,9 @@ def err_template_arg_not_object_or_func : Error< "non-type template argument does not refer to an object or function">; def err_template_arg_not_pointer_to_member_form : Error< "non-type template argument is not a pointer to member constant">; +def err_template_arg_member_ptr_base_derived_not_supported : Error< + "sorry, non-type template argument of pointer-to-member type %1 that refers " + "to member %q0 of a different class is not supported yet">; def ext_template_arg_extra_parens : ExtWarn< "address non-type template argument cannot be surrounded by parentheses">; def warn_cxx98_compat_template_arg_extra_parens : Warning< diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 460d0c961c927..2c781eb88415f 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -7623,8 +7623,8 @@ class Sema final { QualType ParamType, SourceLocation Loc); ExprResult - BuildExpressionFromNonTypeTemplateArgument(const TemplateArgument &Arg, - SourceLocation Loc); + BuildExpressionFromIntegralTemplateArgument(const TemplateArgument &Arg, + SourceLocation Loc); /// Enumeration describing how template parameter lists are compared /// for equality. diff --git a/clang/include/clang/Serialization/ASTRecordWriter.h b/clang/include/clang/Serialization/ASTRecordWriter.h index ff654f417dda9..e362463b23096 100644 --- a/clang/include/clang/Serialization/ASTRecordWriter.h +++ b/clang/include/clang/Serialization/ASTRecordWriter.h @@ -166,7 +166,6 @@ class ASTRecordWriter /// Emit an APvalue. void AddAPValue(const APValue &Value); - void writeAPValue(const APValue &Value) { AddAPValue(Value); } /// Emit a reference to an identifier. void AddIdentifierRef(const IdentifierInfo *II) { diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index d396f81188dfd..0190573fe36e2 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -5941,11 +5941,6 @@ ASTContext::getCanonicalTemplateArgument(const TemplateArgument &Arg) const { case TemplateArgument::Integral: return TemplateArgument(Arg, getCanonicalType(Arg.getIntegralType())); - case TemplateArgument::UncommonValue: - return TemplateArgument(*this, - getCanonicalType(Arg.getUncommonValueType()), - Arg.getAsUncommonValue()); - case TemplateArgument::Type: return TemplateArgument(getCanonicalType(Arg.getAsType())); diff --git a/clang/lib/AST/ASTImporter.cpp b/clang/lib/AST/ASTImporter.cpp index 456e51ad3db4d..54816b721a4af 100644 --- a/clang/lib/AST/ASTImporter.cpp +++ b/clang/lib/AST/ASTImporter.cpp @@ -808,17 +808,6 @@ ASTNodeImporter::import(const TemplateArgument &From) { return TemplateArgument(*ToTypeOrErr, /*isNullPtr*/true); } - case TemplateArgument::UncommonValue: { - ExpectedType ToTypeOrErr = import(From.getUncommonValueType()); - if (!ToTypeOrErr) - return ToTypeOrErr.takeError(); - Expected ToValueOrErr = import(From.getAsUncommonValue()); - if (!ToValueOrErr) - return ToValueOrErr.takeError(); - return TemplateArgument(Importer.getToContext(), *ToTypeOrErr, - *ToValueOrErr); - } - case TemplateArgument::Template: { Expected ToTemplateOrErr = import(From.getAsTemplate()); if (!ToTemplateOrErr) diff --git a/clang/lib/AST/ASTStructuralEquivalence.cpp b/clang/lib/AST/ASTStructuralEquivalence.cpp index f7696bc7c9212..d004e443ae06a 100644 --- a/clang/lib/AST/ASTStructuralEquivalence.cpp +++ b/clang/lib/AST/ASTStructuralEquivalence.cpp @@ -565,10 +565,6 @@ static bool IsStructurallyEquivalent(StructuralEquivalenceContext &Context, return IsStructurallyEquivalent(Context, Arg1.getAsExpr(), Arg2.getAsExpr()); - case TemplateArgument::UncommonValue: - // FIXME: Do we need to customize the comparison? - return Arg1.structurallyEquals(Arg2); - case TemplateArgument::Pack: if (Arg1.pack_size() != Arg2.pack_size()) return false; diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp index fd10ea56f1712..f0c925f9cdf90 100644 --- a/clang/lib/AST/Decl.cpp +++ b/clang/lib/AST/Decl.cpp @@ -342,10 +342,6 @@ LinkageComputer::getLVForTemplateArgumentList(ArrayRef Args, LV.merge(getTypeLinkageAndVisibility(Arg.getNullPtrType())); continue; - case TemplateArgument::UncommonValue: - LV.merge(getLVForValue(Arg.getAsUncommonValue(), computation)); - continue; - case TemplateArgument::Template: case TemplateArgument::TemplateExpansion: if (TemplateDecl *Template = diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp index 08206c4e1a41c..01deb598a0781 100644 --- a/clang/lib/AST/ItaniumMangle.cpp +++ b/clang/lib/AST/ItaniumMangle.cpp @@ -4079,28 +4079,10 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { mangleExpression(cast(E)->getSubExpr(), Arity); break; - case Expr::SubstNonTypeTemplateParmExprClass: { - // Mangle a substituted parameter the same way we mangle the template - // argument. - // As proposed in https://github.com/itanium-cxx-abi/cxx-abi/issues/111. - auto *SNTTPE = cast(E); - if (auto *CE = dyn_cast(SNTTPE->getReplacement())) { - // Pull out the constant value and mangle it as a template argument. - QualType ParamType = SNTTPE->getParameterType(Context.getASTContext()); - if (CE->hasAPValueResult()) - mangleValueInTemplateArg(ParamType, CE->getResultAsAPValue(), false, - /*NeedExactType=*/true); - else - mangleValueInTemplateArg(ParamType, CE->getAPValueResult(), false, - /*NeedExactType=*/true); - } else { - // The remaining cases all happen to be substituted with expressions that - // mangle the same as a corresponding template argument anyway. - mangleExpression(cast(E)->getReplacement(), - Arity); - } + case Expr::SubstNonTypeTemplateParmExprClass: + mangleExpression(cast(E)->getReplacement(), + Arity); break; - } case Expr::UserDefinedLiteralClass: // We follow g++'s approach of mangling a UDL as a call to the literal @@ -5057,10 +5039,6 @@ void CXXNameMangler::mangleTemplateArg(TemplateArgument A, bool NeedExactType) { mangleNullPointer(A.getNullPtrType()); break; } - case TemplateArgument::UncommonValue: - mangleValueInTemplateArg(A.getUncommonValueType(), A.getAsUncommonValue(), - /*TopLevel=*/true, NeedExactType); - break; case TemplateArgument::Pack: { // ::= J * E Out << 'J'; @@ -5395,20 +5373,7 @@ void CXXNameMangler::mangleValueInTemplateArg(QualType T, const APValue &V, Out << "plcvPcad"; Kind = Offset; } else { - // Clang 11 and before mangled an array subject to array-to-pointer decay - // as if it were the declaration itself. - bool IsArrayToPointerDecayMangledAsDecl = false; - if (TopLevel && Ctx.getLangOpts().getClangABICompat() <= - LangOptions::ClangABI::Ver11) { - QualType BType = B.getType(); - IsArrayToPointerDecayMangledAsDecl = - BType->isArrayType() && V.getLValuePath().size() == 1 && - V.getLValuePath()[0].getAsArrayIndex() == 0 && - Ctx.hasSimilarType(T, Ctx.getDecayedType(BType)); - } - - if ((!V.getLValuePath().empty() || V.isLValueOnePastTheEnd()) && - !IsArrayToPointerDecayMangledAsDecl) { + if (!V.getLValuePath().empty() || V.isLValueOnePastTheEnd()) { NotPrimaryExpr(); // A final conversion to the template parameter's type is usually // folded into the 'so' mangling, but we can't do that for 'void*' diff --git a/clang/lib/AST/MicrosoftMangle.cpp b/clang/lib/AST/MicrosoftMangle.cpp index 16e0aa2ae4667..df6c566abc7d7 100644 --- a/clang/lib/AST/MicrosoftMangle.cpp +++ b/clang/lib/AST/MicrosoftMangle.cpp @@ -1575,17 +1575,6 @@ void MicrosoftCXXNameMangler::mangleTemplateArg(const TemplateDecl *TD, cast(Parm), T); break; } - case TemplateArgument::UncommonValue: - Out << "$"; - if (cast(Parm) - ->getType() - ->getContainedDeducedType()) { - Out << "M"; - mangleType(TA.getNonTypeTemplateArgumentType(), SourceRange(), QMM_Drop); - } - mangleTemplateArgValue(TA.getUncommonValueType(), TA.getAsUncommonValue(), - /*WithScalarType=*/false); - break; case TemplateArgument::Expression: mangleExpression(TA.getAsExpr(), cast(Parm)); break; diff --git a/clang/lib/AST/ODRHash.cpp b/clang/lib/AST/ODRHash.cpp index 92e3bc27fca07..735bcff8f1137 100644 --- a/clang/lib/AST/ODRHash.cpp +++ b/clang/lib/AST/ODRHash.cpp @@ -169,8 +169,6 @@ void ODRHash::AddTemplateArgument(TemplateArgument TA) { break; case TemplateArgument::NullPtr: case TemplateArgument::Integral: - case TemplateArgument::UncommonValue: - // FIXME: Include a representation of these arguments. break; case TemplateArgument::Template: case TemplateArgument::TemplateExpansion: diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp index 569e3b06f3b20..de9de6ff463c6 100644 --- a/clang/lib/AST/StmtProfile.cpp +++ b/clang/lib/AST/StmtProfile.cpp @@ -2208,12 +2208,6 @@ void StmtProfiler::VisitTemplateArgument(const TemplateArgument &Arg) { Arg.getAsIntegral().Profile(ID); break; - case TemplateArgument::UncommonValue: - VisitType(Arg.getUncommonValueType()); - // FIXME: Do we need to recursively decompose this ourselves? - Arg.getAsUncommonValue().Profile(ID); - break; - case TemplateArgument::Expression: Visit(Arg.getAsExpr()); break; diff --git a/clang/lib/AST/TemplateBase.cpp b/clang/lib/AST/TemplateBase.cpp index 0029c90a0ab65..44d52c56ffbbc 100644 --- a/clang/lib/AST/TemplateBase.cpp +++ b/clang/lib/AST/TemplateBase.cpp @@ -84,8 +84,8 @@ static void printIntegral(const TemplateArgument &TemplArg, // TemplateArgument Implementation //===----------------------------------------------------------------------===// -TemplateArgument::TemplateArgument(const ASTContext &Ctx, - const llvm::APSInt &Value, QualType Type) { +TemplateArgument::TemplateArgument(ASTContext &Ctx, const llvm::APSInt &Value, + QualType Type) { Integer.Kind = Integral; // Copy the APSInt value into our decomposed form. Integer.BitWidth = Value.getBitWidth(); @@ -103,44 +103,6 @@ TemplateArgument::TemplateArgument(const ASTContext &Ctx, Integer.Type = Type.getAsOpaquePtr(); } -static const ValueDecl *getAsSimpleValueDeclRef(const ASTContext &Ctx, - QualType T, const APValue &V) { - // Pointers to members are relatively easy. - if (V.isMemberPointer() && V.getMemberPointerPath().empty()) - return V.getMemberPointerDecl(); - - // We model class non-type template parameters as their template parameter - // object declaration. - if (V.isStruct() || V.isUnion()) - return Ctx.getTemplateParamObjectDecl(T, V); - - // Pointers and references with an empty path use the special 'Declaration' - // representation. - if (V.isLValue() && V.hasLValuePath() && - V.getLValuePath().empty() && !V.isLValueOnePastTheEnd()) - return V.getLValueBase().dyn_cast(); - - // Everything else uses the 'uncommon' representation. - return nullptr; -} - -TemplateArgument::TemplateArgument(const ASTContext &Ctx, QualType Type, - const APValue &V) { - if (Type->isIntegralOrEnumerationType() && V.isInt()) - *this = TemplateArgument(Ctx, V.getInt(), Type); - else if ((V.isLValue() && V.isNullPointer()) || - (V.isMemberPointer() && !V.getMemberPointerDecl())) - *this = TemplateArgument(Type, /*isNullPtr=*/true); - else if (const ValueDecl *VD = getAsSimpleValueDeclRef(Ctx, Type, V)) - // FIXME: The Declaration form should expose a const ValueDecl*. - *this = TemplateArgument(const_cast(VD), Type); - else { - Value.Kind = UncommonValue; - Value.Value = new (Ctx) APValue(V); - Value.Type = Type.getAsOpaquePtr(); - } -} - TemplateArgument TemplateArgument::CreatePackCopy(ASTContext &Context, ArrayRef Args) { @@ -172,7 +134,6 @@ TemplateArgumentDependence TemplateArgument::getDependence() const { case NullPtr: case Integral: case Declaration: - case UncommonValue: return TemplateArgumentDependence::None; case Expression: @@ -204,7 +165,6 @@ bool TemplateArgument::isPackExpansion() const { case Null: case Declaration: case Integral: - case UncommonValue: case Pack: case Template: case NullPtr: @@ -255,9 +215,6 @@ QualType TemplateArgument::getNonTypeTemplateArgumentType() const { case TemplateArgument::NullPtr: return getNullPtrType(); - - case TemplateArgument::UncommonValue: - return getUncommonValueType(); } llvm_unreachable("Invalid TemplateArgument Kind!"); @@ -302,13 +259,8 @@ void TemplateArgument::Profile(llvm::FoldingSetNodeID &ID, } case Integral: - getIntegralType().Profile(ID); getAsIntegral().Profile(ID); - break; - - case UncommonValue: - getUncommonValueType().Profile(ID); - getAsUncommonValue().Profile(ID); + getIntegralType().Profile(ID); break; case Expression: @@ -344,16 +296,6 @@ bool TemplateArgument::structurallyEquals(const TemplateArgument &Other) const { return getIntegralType() == Other.getIntegralType() && getAsIntegral() == Other.getAsIntegral(); - case UncommonValue: { - if (getUncommonValueType() != Other.getUncommonValueType()) - return false; - - llvm::FoldingSetNodeID A, B; - getAsUncommonValue().Profile(A); - Other.getAsUncommonValue().Profile(B); - return A == B; - } - case Pack: if (Args.NumArgs != Other.Args.NumArgs) return false; for (unsigned I = 0, E = Args.NumArgs; I != E; ++I) @@ -380,7 +322,6 @@ TemplateArgument TemplateArgument::getPackExpansionPattern() const { case Declaration: case Integral: - case UncommonValue: case Pack: case Null: case Template: @@ -420,10 +361,6 @@ void TemplateArgument::print(const PrintingPolicy &Policy, break; } - case UncommonValue: - getAsUncommonValue().printPretty(Out, Policy, getUncommonValueType()); - break; - case NullPtr: Out << "nullptr"; break; @@ -506,9 +443,6 @@ SourceRange TemplateArgumentLoc::getSourceRange() const { case TemplateArgument::Integral: return getSourceIntegralExpression()->getSourceRange(); - case TemplateArgument::UncommonValue: - return getSourceUncommonValueExpression()->getSourceRange(); - case TemplateArgument::Pack: case TemplateArgument::Null: return SourceRange(); @@ -537,18 +471,6 @@ static const T &DiagTemplateArg(const T &DB, const TemplateArgument &Arg) { case TemplateArgument::Integral: return DB << Arg.getAsIntegral().toString(10); - case TemplateArgument::UncommonValue: { - // FIXME: We're guessing at LangOptions! - SmallString<32> Str; - llvm::raw_svector_ostream OS(Str); - LangOptions LangOpts; - LangOpts.CPlusPlus = true; - PrintingPolicy Policy(LangOpts); - Arg.getAsUncommonValue().printPretty(OS, Policy, - Arg.getUncommonValueType()); - return DB << OS.str(); - } - case TemplateArgument::Template: return DB << Arg.getAsTemplate(); diff --git a/clang/lib/AST/TypeLoc.cpp b/clang/lib/AST/TypeLoc.cpp index 438b6950890b9..222b1abac5105 100644 --- a/clang/lib/AST/TypeLoc.cpp +++ b/clang/lib/AST/TypeLoc.cpp @@ -562,7 +562,6 @@ void TemplateSpecializationTypeLoc::initializeArgLocs(ASTContext &Context, case TemplateArgument::Integral: case TemplateArgument::Declaration: case TemplateArgument::NullPtr: - case TemplateArgument::UncommonValue: ArgInfos[i] = TemplateArgumentLocInfo(); break; diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp index ae50f80e6fb5d..7b20d43b0f17e 100644 --- a/clang/lib/CodeGen/CGDebugInfo.cpp +++ b/clang/lib/CodeGen/CGDebugInfo.cpp @@ -1953,14 +1953,6 @@ CGDebugInfo::CollectTemplateParams(const TemplateParameterList *TPList, TemplateParams.push_back(DBuilder.createTemplateValueParameter( TheCU, Name, TTy, defaultParameter, V)); } break; - case TemplateArgument::UncommonValue: { - QualType T = TA.getUncommonValueType(); - llvm::DIType *TTy = getOrCreateType(T, Unit); - llvm::Constant *V = ConstantEmitter(CGM).emitAbstract( - SourceLocation(), TA.getAsUncommonValue(), T); - TemplateParams.push_back(DBuilder.createTemplateValueParameter( - TheCU, Name, TTy, defaultParameter, V)); - } break; case TemplateArgument::Template: TemplateParams.push_back(DBuilder.createTemplateTemplateParameter( TheCU, Name, nullptr, diff --git a/clang/lib/CodeGen/CGExprConstant.cpp b/clang/lib/CodeGen/CGExprConstant.cpp index 840541a4af20a..ca1d3a937fa87 100644 --- a/clang/lib/CodeGen/CGExprConstant.cpp +++ b/clang/lib/CodeGen/CGExprConstant.cpp @@ -1366,11 +1366,11 @@ llvm::Constant *ConstantEmitter::tryEmitConstantExpr(const ConstantExpr *CE) { if (!CE->hasAPValueResult()) return nullptr; const Expr *Inner = CE->getSubExpr()->IgnoreImplicit(); - QualType RetType = Inner->getType(); - if (Inner->isLValue()) - RetType = CGF->getContext().getLValueReferenceType(RetType); - else if (Inner->isXValue()) - RetType = CGF->getContext().getRValueReferenceType(RetType); + QualType RetType; + if (auto *Call = dyn_cast(Inner)) + RetType = Call->getCallReturnType(CGF->getContext()); + else if (auto *Ctor = dyn_cast(Inner)) + RetType = Ctor->getType(); llvm::Constant *Res = emitAbstract(CE->getBeginLoc(), CE->getAPValueResult(), RetType); return Res; diff --git a/clang/lib/Index/USRGeneration.cpp b/clang/lib/Index/USRGeneration.cpp index 9ada9ac8c2098..abaeb1a4232f2 100644 --- a/clang/lib/Index/USRGeneration.cpp +++ b/clang/lib/Index/USRGeneration.cpp @@ -983,10 +983,6 @@ void USRGenerator::VisitTemplateArgument(const TemplateArgument &Arg) { VisitType(Arg.getIntegralType()); Out << Arg.getAsIntegral(); break; - - case TemplateArgument::UncommonValue: - // FIXME: Visit value. - break; } } diff --git a/clang/lib/Sema/SemaLookup.cpp b/clang/lib/Sema/SemaLookup.cpp index 8885e4191987e..16dd8f5105961 100644 --- a/clang/lib/Sema/SemaLookup.cpp +++ b/clang/lib/Sema/SemaLookup.cpp @@ -2657,7 +2657,6 @@ addAssociatedClassesAndNamespaces(AssociatedLookup &Result, case TemplateArgument::Integral: case TemplateArgument::Expression: case TemplateArgument::NullPtr: - case TemplateArgument::UncommonValue: // [Note: non-type template arguments do not contribute to the set of // associated namespaces. ] break; diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp index 5f4d5b9632e05..ac52612ea3b0c 100644 --- a/clang/lib/Sema/SemaOverload.cpp +++ b/clang/lib/Sema/SemaOverload.cpp @@ -5774,9 +5774,7 @@ static ExprResult CheckConvertedConstantExpression(Sema &S, Expr *From, if (Notes.empty()) { // It's a constant expression. - Expr *E = Result.get(); - if (!isa(E)) - E = ConstantExpr::Create(S.Context, Result.get(), Value); + Expr *E = ConstantExpr::Create(S.Context, Result.get(), Value); if (ReturnPreNarrowingValue) Value = std::move(PreNarrowingValue); return E; diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp index 74a31c0a1b659..7ebd9be831ad5 100644 --- a/clang/lib/Sema/SemaTemplate.cpp +++ b/clang/lib/Sema/SemaTemplate.cpp @@ -4086,7 +4086,6 @@ static bool isTemplateArgumentTemplateParameter( case TemplateArgument::NullPtr: case TemplateArgument::Integral: case TemplateArgument::Declaration: - case TemplateArgument::UncommonValue: case TemplateArgument::Pack: case TemplateArgument::TemplateExpansion: return false; @@ -5420,7 +5419,6 @@ bool Sema::CheckTemplateArgument(NamedDecl *Param, case TemplateArgument::Declaration: case TemplateArgument::Integral: - case TemplateArgument::UncommonValue: case TemplateArgument::NullPtr: // We've already checked this template argument, so just copy // it to the list of converted arguments. @@ -5567,10 +5565,11 @@ bool Sema::CheckTemplateArgument(NamedDecl *Param, return true; case TemplateArgument::Declaration: + llvm_unreachable("Declaration argument with template template parameter"); case TemplateArgument::Integral: - case TemplateArgument::UncommonValue: + llvm_unreachable("Integral argument with template template parameter"); case TemplateArgument::NullPtr: - llvm_unreachable("non-type argument with template template parameter"); + llvm_unreachable("Null pointer argument with template template parameter"); case TemplateArgument::Pack: llvm_unreachable("Caller must expand template argument packs"); @@ -6937,9 +6936,37 @@ ExprResult Sema::CheckTemplateArgument(NonTypeTemplateParmDecl *Param, return ArgResult; } - // Prior to C++20, enforce restrictions on possible template argument - // values. - if (!getLangOpts().CPlusPlus20 && Value.isLValue()) { + // Convert the APValue to a TemplateArgument. + switch (Value.getKind()) { + case APValue::None: + assert(ParamType->isNullPtrType()); + Converted = TemplateArgument(CanonParamType, /*isNullPtr*/true); + break; + case APValue::Indeterminate: + llvm_unreachable("result of constant evaluation should be initialized"); + break; + case APValue::Int: + assert(ParamType->isIntegralOrEnumerationType()); + Converted = TemplateArgument(Context, Value.getInt(), CanonParamType); + break; + case APValue::MemberPointer: { + assert(ParamType->isMemberPointerType()); + + // FIXME: We need TemplateArgument representation and mangling for these. + if (!Value.getMemberPointerPath().empty()) { + Diag(Arg->getBeginLoc(), + diag::err_template_arg_member_ptr_base_derived_not_supported) + << Value.getMemberPointerDecl() << ParamType + << Arg->getSourceRange(); + return ExprError(); + } + + auto *VD = const_cast(Value.getMemberPointerDecl()); + Converted = VD ? TemplateArgument(VD, CanonParamType) + : TemplateArgument(CanonParamType, /*isNullPtr*/true); + break; + } + case APValue::LValue: { // For a non-type template-parameter of pointer or reference type, // the value of the constant expression shall not refer to assert(ParamType->isPointerType() || ParamType->isReferenceType() || @@ -6955,7 +6982,8 @@ ExprResult Sema::CheckTemplateArgument(NonTypeTemplateParmDecl *Param, << Arg->getSourceRange(); return ExprError(); } - // -- a subobject [until C++20] + // -- a subobject + // FIXME: Until C++20 if (Value.hasLValuePath() && Value.getLValuePath().size() == 1 && VD && VD->getType()->isArrayType() && Value.getLValuePath()[0].getAsArrayIndex() == 0 && @@ -6973,12 +7001,29 @@ ExprResult Sema::CheckTemplateArgument(NonTypeTemplateParmDecl *Param, "null reference should not be a constant expression"); assert((!VD || !ParamType->isNullPtrType()) && "non-null value of type nullptr_t?"); + Converted = VD ? TemplateArgument(VD, CanonParamType) + : TemplateArgument(CanonParamType, /*isNullPtr*/true); + break; } - - if (Value.isAddrLabelDiff()) + case APValue::Struct: + case APValue::Union: + // Get or create the corresponding template parameter object. + Converted = TemplateArgument( + Context.getTemplateParamObjectDecl(CanonParamType, Value), + CanonParamType); + break; + case APValue::AddrLabelDiff: return Diag(StartLoc, diag::err_non_type_template_arg_addr_label_diff); + case APValue::FixedPoint: + case APValue::Float: + case APValue::ComplexInt: + case APValue::ComplexFloat: + case APValue::Vector: + case APValue::Array: + return Diag(StartLoc, diag::err_non_type_template_arg_unsupported) + << ParamType; + } - Converted = TemplateArgument(Context, CanonParamType, Value); return ArgResult.get(); } @@ -7516,9 +7561,12 @@ Sema::BuildExpressionFromDeclTemplateArgument(const TemplateArgument &Arg, /// This routine takes care of the mapping from an integral template /// argument (which may have any integral type) to the appropriate /// literal value. -static Expr *BuildExpressionFromIntegralTemplateArgumentValue( - Sema &S, QualType OrigT, const llvm::APSInt &Int, SourceLocation Loc) { - assert(OrigT->isIntegralOrEnumerationType()); +ExprResult +Sema::BuildExpressionFromIntegralTemplateArgument(const TemplateArgument &Arg, + SourceLocation Loc) { + assert(Arg.getKind() == TemplateArgument::Integral && + "Operation is only valid for integral template arguments"); + QualType OrigT = Arg.getIntegralType(); // If this is an enum type that we're instantiating, we need to use an integer // type the same size as the enumerator. We don't want to build an @@ -7534,7 +7582,7 @@ static Expr *BuildExpressionFromIntegralTemplateArgumentValue( CharacterLiteral::CharacterKind Kind; if (T->isWideCharType()) Kind = CharacterLiteral::Wide; - else if (T->isChar8Type() && S.getLangOpts().Char8) + else if (T->isChar8Type() && getLangOpts().Char8) Kind = CharacterLiteral::UTF8; else if (T->isChar16Type()) Kind = CharacterLiteral::UTF16; @@ -7543,131 +7591,29 @@ static Expr *BuildExpressionFromIntegralTemplateArgumentValue( else Kind = CharacterLiteral::Ascii; - E = new (S.Context) CharacterLiteral(Int.getZExtValue(), Kind, T, Loc); + E = new (Context) CharacterLiteral(Arg.getAsIntegral().getZExtValue(), + Kind, T, Loc); } else if (T->isBooleanType()) { - E = new (S.Context) CXXBoolLiteralExpr(Int.getBoolValue(), T, Loc); + E = new (Context) CXXBoolLiteralExpr(Arg.getAsIntegral().getBoolValue(), + T, Loc); + } else if (T->isNullPtrType()) { + E = new (Context) CXXNullPtrLiteralExpr(Context.NullPtrTy, Loc); } else { - E = IntegerLiteral::Create(S.Context, Int, T, Loc); + E = IntegerLiteral::Create(Context, Arg.getAsIntegral(), T, Loc); } if (OrigT->isEnumeralType()) { // FIXME: This is a hack. We need a better way to handle substituted // non-type template parameters. - E = CStyleCastExpr::Create(S.Context, OrigT, VK_RValue, CK_IntegralCast, E, - nullptr, S.CurFPFeatureOverrides(), - S.Context.getTrivialTypeSourceInfo(OrigT, Loc), + E = CStyleCastExpr::Create(Context, OrigT, VK_RValue, CK_IntegralCast, E, + nullptr, CurFPFeatureOverrides(), + Context.getTrivialTypeSourceInfo(OrigT, Loc), Loc, Loc); } return E; } -static Expr *BuildExpressionFromNonTypeTemplateArgumentValue( - Sema &S, QualType T, const APValue &Val, SourceLocation Loc) { - auto MakeInitList = [&] (ArrayRef Elts) -> Expr* { - auto *ILE = new (S.Context) InitListExpr(S.Context, Loc, Elts, Loc); - ILE->setType(T); - return ILE; - }; - - switch (Val.getKind()) { - case APValue::AddrLabelDiff: - // This cannot occur in a template argument at all. - case APValue::Array: - case APValue::Struct: - case APValue::Union: - // These can only occur within a template parameter object, which is - // represented as a TemplateArgument::Declaration. - llvm_unreachable("unexpected template argument value"); - - case APValue::Int: - return BuildExpressionFromIntegralTemplateArgumentValue(S, T, Val.getInt(), - Loc); - - case APValue::Float: - return FloatingLiteral::Create(S.Context, Val.getFloat(), /*IsExact=*/true, - T, Loc); - - case APValue::FixedPoint: - return FixedPointLiteral::CreateFromRawInt( - S.Context, Val.getFixedPoint().getValue(), T, Loc, - Val.getFixedPoint().getScale()); - - case APValue::ComplexInt: { - QualType ElemT = T->castAs()->getElementType(); - return MakeInitList({BuildExpressionFromIntegralTemplateArgumentValue( - S, ElemT, Val.getComplexIntReal(), Loc), - BuildExpressionFromIntegralTemplateArgumentValue( - S, ElemT, Val.getComplexIntImag(), Loc)}); - } - - case APValue::ComplexFloat: { - QualType ElemT = T->castAs()->getElementType(); - return MakeInitList( - {FloatingLiteral::Create(S.Context, Val.getComplexFloatReal(), true, - ElemT, Loc), - FloatingLiteral::Create(S.Context, Val.getComplexFloatImag(), true, - ElemT, Loc)}); - } - - case APValue::Vector: { - QualType ElemT = T->castAs()->getElementType(); - llvm::SmallVector Elts; - for (unsigned I = 0, N = Val.getVectorLength(); I != N; ++I) - Elts.push_back(BuildExpressionFromNonTypeTemplateArgumentValue( - S, ElemT, Val.getVectorElt(I), Loc)); - return MakeInitList(Elts); - } - - case APValue::None: - case APValue::Indeterminate: - // FIXME: Are these values possible? - case APValue::LValue: - case APValue::MemberPointer: - // There isn't necessarily a valid equivalent source-level syntax for - // these; in particular, a naive lowering might violate access control. - // So for now we lower to a ConstantExpr holding the value, wrapped around - // an OpaqueValueExpr. - // FIXME: We should have a better representation for this. - ExprValueKind VK = VK_RValue; - if (T->isReferenceType()) { - T = T->getPointeeType(); - VK = VK_LValue; - } - auto *OVE = new (S.Context) OpaqueValueExpr(Loc, T, VK); - return ConstantExpr::Create(S.Context, OVE, Val); - } -} - -ExprResult -Sema::BuildExpressionFromNonTypeTemplateArgument(const TemplateArgument &Arg, - SourceLocation Loc) { - switch (Arg.getKind()) { - case TemplateArgument::Null: - case TemplateArgument::Type: - case TemplateArgument::Template: - case TemplateArgument::TemplateExpansion: - case TemplateArgument::Pack: - llvm_unreachable("not a non-type template argument"); - - case TemplateArgument::Expression: - return Arg.getAsExpr(); - - case TemplateArgument::NullPtr: - case TemplateArgument::Declaration: - return BuildExpressionFromDeclTemplateArgument( - Arg, Arg.getNonTypeTemplateArgumentType(), Loc); - - case TemplateArgument::Integral: - return BuildExpressionFromIntegralTemplateArgumentValue( - *this, Arg.getIntegralType(), Arg.getAsIntegral(), Loc); - - case TemplateArgument::UncommonValue: - return BuildExpressionFromNonTypeTemplateArgumentValue( - *this, Arg.getUncommonValueType(), Arg.getAsUncommonValue(), Loc); - } -} - /// Match two template parameters within template parameter lists. static bool MatchTemplateParameterKind(Sema &S, NamedDecl *New, NamedDecl *Old, bool Complain, diff --git a/clang/lib/Sema/SemaTemplateDeduction.cpp b/clang/lib/Sema/SemaTemplateDeduction.cpp index 23d4056ce254c..4a3b64cf54250 100644 --- a/clang/lib/Sema/SemaTemplateDeduction.cpp +++ b/clang/lib/Sema/SemaTemplateDeduction.cpp @@ -276,16 +276,6 @@ checkDeducedTemplateArguments(ASTContext &Context, // All other combinations are incompatible. return DeducedTemplateArgument(); - case TemplateArgument::UncommonValue: - // If we deduced a value and a dependent expression, keep the value. - if (Y.getKind() == TemplateArgument::Expression || - (Y.getKind() == TemplateArgument::UncommonValue && - X.structurallyEquals(Y))) - return X; - - // All other combinations are incompatible. - return DeducedTemplateArgument(); - case TemplateArgument::Template: if (Y.getKind() == TemplateArgument::Template && Context.hasSameTemplateName(X.getAsTemplate(), Y.getAsTemplate())) @@ -2371,18 +2361,20 @@ DeduceTemplateArguments(Sema &S, return Sema::TDK_NonDeducedMismatch; case TemplateArgument::Integral: - if (Arg.getKind() == TemplateArgument::Integral && - hasSameExtendedValue(Param.getAsIntegral(), Arg.getAsIntegral())) - return Sema::TDK_Success; + if (Arg.getKind() == TemplateArgument::Integral) { + if (hasSameExtendedValue(Param.getAsIntegral(), Arg.getAsIntegral())) + return Sema::TDK_Success; - Info.FirstArg = Param; - Info.SecondArg = Arg; - return Sema::TDK_NonDeducedMismatch; + Info.FirstArg = Param; + Info.SecondArg = Arg; + return Sema::TDK_NonDeducedMismatch; + } - case TemplateArgument::UncommonValue: - if (Arg.getKind() == TemplateArgument::UncommonValue && - Arg.structurallyEquals(Param)) - return Sema::TDK_Success; + if (Arg.getKind() == TemplateArgument::Expression) { + Info.FirstArg = Param; + Info.SecondArg = Arg; + return Sema::TDK_NonDeducedMismatch; + } Info.FirstArg = Param; Info.SecondArg = Arg; @@ -2391,34 +2383,28 @@ DeduceTemplateArguments(Sema &S, case TemplateArgument::Expression: if (const NonTypeTemplateParmDecl *NTTP = getDeducedParameterFromExpr(Info, Param.getAsExpr())) { - switch (Arg.getKind()) { - case TemplateArgument::Integral: - case TemplateArgument::Expression: - case TemplateArgument::UncommonValue: - return DeduceNonTypeTemplateArgument( - S, TemplateParams, NTTP, DeducedTemplateArgument(Arg), - Arg.getNonTypeTemplateArgumentType(), Info, Deduced); - - case TemplateArgument::NullPtr: - return DeduceNullPtrTemplateArgument( - S, TemplateParams, NTTP, Arg.getNullPtrType(), Info, Deduced); - + if (Arg.getKind() == TemplateArgument::Integral) + return DeduceNonTypeTemplateArgument(S, TemplateParams, NTTP, + Arg.getAsIntegral(), + Arg.getIntegralType(), + /*ArrayBound=*/false, + Info, Deduced); + if (Arg.getKind() == TemplateArgument::NullPtr) + return DeduceNullPtrTemplateArgument(S, TemplateParams, NTTP, + Arg.getNullPtrType(), + Info, Deduced); + if (Arg.getKind() == TemplateArgument::Expression) + return DeduceNonTypeTemplateArgument(S, TemplateParams, NTTP, + Arg.getAsExpr(), Info, Deduced); + if (Arg.getKind() == TemplateArgument::Declaration) + return DeduceNonTypeTemplateArgument(S, TemplateParams, NTTP, + Arg.getAsDecl(), + Arg.getParamTypeForDecl(), + Info, Deduced); - case TemplateArgument::Declaration: - return DeduceNonTypeTemplateArgument( - S, TemplateParams, NTTP, Arg.getAsDecl(), Arg.getParamTypeForDecl(), - Info, Deduced); - - case TemplateArgument::Null: - case TemplateArgument::Type: - case TemplateArgument::Template: - case TemplateArgument::TemplateExpansion: - case TemplateArgument::Pack: - Info.FirstArg = Param; - Info.SecondArg = Arg; - return Sema::TDK_NonDeducedMismatch; - } - llvm_unreachable("Unknown template argument kind"); + Info.FirstArg = Param; + Info.SecondArg = Arg; + return Sema::TDK_NonDeducedMismatch; } // Can't deduce anything, but that's okay. @@ -2606,9 +2592,6 @@ static bool isSameTemplateArg(ASTContext &Context, case TemplateArgument::Integral: return hasSameExtendedValue(X.getAsIntegral(), Y.getAsIntegral()); - case TemplateArgument::UncommonValue: - return X.structurallyEquals(Y); - case TemplateArgument::Expression: { llvm::FoldingSetNodeID XID, YID; X.getAsExpr()->Profile(XID, Context, true); @@ -2674,9 +2657,9 @@ Sema::getTrivialTemplateArgumentLoc(const TemplateArgument &Arg, E); } - case TemplateArgument::Integral: - case TemplateArgument::UncommonValue: { - Expr *E = BuildExpressionFromNonTypeTemplateArgument(Arg, Loc).get(); + case TemplateArgument::Integral: { + Expr *E = + BuildExpressionFromIntegralTemplateArgument(Arg, Loc).getAs(); return TemplateArgumentLoc(TemplateArgument(E), E); } @@ -6120,8 +6103,11 @@ MarkUsedTemplateParameters(ASTContext &Ctx, case TemplateArgument::Null: case TemplateArgument::Integral: case TemplateArgument::Declaration: + break; + case TemplateArgument::NullPtr: - case TemplateArgument::UncommonValue: + MarkUsedTemplateParameters(Ctx, TemplateArg.getNullPtrType(), OnlyDeduced, + Depth, Used); break; case TemplateArgument::Type: diff --git a/clang/lib/Sema/SemaTemplateInstantiate.cpp b/clang/lib/Sema/SemaTemplateInstantiate.cpp index d04ae36360a6b..cbf4fb1de4656 100644 --- a/clang/lib/Sema/SemaTemplateInstantiate.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiate.cpp @@ -1556,18 +1556,16 @@ ExprResult TemplateInstantiator::transformNonTypeTemplateParmRef( VD = nullptr; } - QualType paramType = arg.getNonTypeTemplateArgumentType(); + QualType paramType = VD ? arg.getParamTypeForDecl() : arg.getNullPtrType(); assert(!paramType.isNull() && "type substitution failed for param type"); assert(!paramType->isDependentType() && "param type still dependent"); result = SemaRef.BuildExpressionFromDeclTemplateArgument(arg, paramType, loc); refParam = paramType->isReferenceType(); } else { - QualType paramType = arg.getNonTypeTemplateArgumentType(); - result = SemaRef.BuildExpressionFromNonTypeTemplateArgument(arg, loc); - refParam = paramType->isReferenceType(); + result = SemaRef.BuildExpressionFromIntegralTemplateArgument(arg, loc); assert(result.isInvalid() || SemaRef.Context.hasSameType(result.get()->getType(), - paramType.getNonReferenceType())); + arg.getIntegralType())); } if (result.isInvalid()) diff --git a/clang/lib/Sema/SemaTemplateVariadic.cpp b/clang/lib/Sema/SemaTemplateVariadic.cpp index 3c6365a075f48..1951aec3d17d9 100644 --- a/clang/lib/Sema/SemaTemplateVariadic.cpp +++ b/clang/lib/Sema/SemaTemplateVariadic.cpp @@ -1103,7 +1103,6 @@ Sema::getTemplateArgumentPackExpansionPattern( case TemplateArgument::NullPtr: case TemplateArgument::Template: case TemplateArgument::Integral: - case TemplateArgument::UncommonValue: case TemplateArgument::Pack: case TemplateArgument::Null: return TemplateArgumentLoc(); @@ -1154,7 +1153,6 @@ Optional Sema::getFullyPackExpandedSize(TemplateArgument Arg) { case TemplateArgument::NullPtr: case TemplateArgument::TemplateExpansion: case TemplateArgument::Integral: - case TemplateArgument::UncommonValue: case TemplateArgument::Pack: case TemplateArgument::Null: return None; diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index 1695e26c7d5f5..2cc8b9c8324f4 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -3557,7 +3557,6 @@ class TreeTransform { case TemplateArgument::Null: case TemplateArgument::Integral: case TemplateArgument::Declaration: - case TemplateArgument::UncommonValue: case TemplateArgument::Pack: case TemplateArgument::TemplateExpansion: case TemplateArgument::NullPtr: @@ -4230,8 +4229,7 @@ bool TreeTransform::TransformTemplateArgument( case TemplateArgument::Integral: case TemplateArgument::NullPtr: - case TemplateArgument::Declaration: - case TemplateArgument::UncommonValue: { + case TemplateArgument::Declaration: { // Transform a resolved template argument straight to a resolved template // argument. We get here when substituting into an already-substituted // template type argument during concept satisfaction checking. @@ -4258,15 +4256,9 @@ bool TreeTransform::TransformTemplateArgument( else if (Arg.getKind() == TemplateArgument::NullPtr) Output = TemplateArgumentLoc(TemplateArgument(NewT, /*IsNullPtr=*/true), TemplateArgumentLocInfo()); - else if (Arg.getKind() == TemplateArgument::Declaration) + else Output = TemplateArgumentLoc(TemplateArgument(NewD, NewT), TemplateArgumentLocInfo()); - else if (Arg.getKind() == TemplateArgument::UncommonValue) - Output = TemplateArgumentLoc( - TemplateArgument(getSema().Context, NewT, Arg.getAsUncommonValue()), - TemplateArgumentLocInfo()); - else - llvm_unreachable("unexpected template argument kind"); return false; } diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 22533527fc610..b48b23ce4a51f 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -7100,7 +7100,6 @@ ASTRecordReader::readTemplateArgumentLocInfo(TemplateArgument::ArgKind Kind) { case TemplateArgument::Integral: case TemplateArgument::Declaration: case TemplateArgument::NullPtr: - case TemplateArgument::UncommonValue: case TemplateArgument::Pack: // FIXME: Is this right? return TemplateArgumentLocInfo(); diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index 87160b26d4210..3a281e492fde7 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -5350,7 +5350,6 @@ void ASTRecordWriter::AddTemplateArgumentLocInfo( case TemplateArgument::Integral: case TemplateArgument::Declaration: case TemplateArgument::NullPtr: - case TemplateArgument::UncommonValue: case TemplateArgument::Pack: // FIXME: Is this right? break; diff --git a/clang/test/CodeGenCXX/mangle-ms-templates.cpp b/clang/test/CodeGenCXX/mangle-ms-templates.cpp index c9149a473b6fa..7402d367ae3e9 100644 --- a/clang/test/CodeGenCXX/mangle-ms-templates.cpp +++ b/clang/test/CodeGenCXX/mangle-ms-templates.cpp @@ -2,7 +2,6 @@ // RUN: %clang_cc1 -std=c++11 -fms-compatibility-version=19 -emit-llvm %s -o - -fms-extensions -fdelayed-template-parsing -triple=x86_64-pc-win32 | FileCheck -check-prefix X64 %s // RUN: %clang_cc1 -std=c++17 -fms-compatibility-version=19 -emit-llvm %s -o - -fms-extensions -fdelayed-template-parsing -triple=i386-pc-win32 | FileCheck %s // RUN: %clang_cc1 -std=c++17 -fms-compatibility-version=19 -emit-llvm %s -o - -fms-extensions -fdelayed-template-parsing -triple=x86_64-pc-win32 | FileCheck -check-prefix X64 %s -// RUN: %clang_cc1 -std=c++20 -fms-compatibility-version=19 -emit-llvm %s -o - -fms-extensions -fdelayed-template-parsing -triple=x86_64-pc-win32 | FileCheck -check-prefix CXX20-X64 %s template class Class { @@ -328,12 +327,3 @@ void fun_uint128(UInt128<(unsigned __int128)-1>) {} // X64: define {{.*}} @"?fun_uint128@@YAXU?$UInt128@$0DPPPPPPPPPPPPPPPAAAAAAAAAAAAAAAB@@@@Z"( void fun_uint128(UInt128<(unsigned __int128)9223372036854775807 * (unsigned __int128)9223372036854775807>) {} #endif - -#if __cplusplus >= 202002L -template struct Float {}; -// CXX20-X64: define {{.*}} @"?f@@YAXU?$Float@$ADPIAAAAA@@@@Z"( -void f(Float<1.0f>) {} -template struct Auto {}; -// CXX20-X64: define {{.*}} @"?f@@YAXU?$Auto@$MMADPIAAAAA@@@@Z"( -void f(Auto<1.0f>) {} -#endif diff --git a/clang/test/CodeGenCXX/mangle-template.cpp b/clang/test/CodeGenCXX/mangle-template.cpp index 8326bf658f064..40688de7e12e8 100644 --- a/clang/test/CodeGenCXX/mangle-template.cpp +++ b/clang/test/CodeGenCXX/mangle-template.cpp @@ -226,16 +226,6 @@ namespace test16 { namespace cxx20 { template struct A {}; template struct B {}; - // CXX20: define {{.*}} @_ZN5cxx201fENS_1AILf3f800000EEE( - void f(A<1.0f>) {} - // CXX20: define {{.*}} @_ZN5cxx201fENS_1AILd3ff0000000000000EEE( - void f(A<1.0>) {} - // CXX20: define {{.*}} @_ZN5cxx201fENS_1AILe3fff8000000000000000EEE( - void f(A<1.0l>) {} - // CXX20: define {{.*}} @_ZN5cxx201fENS_1AIXtlCiLi0ELi1EEEEE( - void f(A<1i>) {} - // CXX20: define {{.*}} @_ZN5cxx201fENS_1AIXtlCdLd0000000000000000ELd3ff0000000000000EEEEE( - void f(A<1.0i>) {} int x; // CXX20: define {{.*}} @_ZN5cxx201fENS_1AIXadL_ZNS_1xEEEEE( @@ -255,24 +245,7 @@ namespace cxx20 { // CXX20: define {{.*}} @_ZN5cxx201fENS_1BIPKvXadL_ZNS_1xEEEEE( void f(B) {} - struct Q { int x; } q; - - // CXX20: define {{.*}} @_ZN5cxx201fENS_1AIXadsoiL_ZNS_1qEEEEEE( - void f(A<&q.x>) {} - // CXX20: define {{.*}} @_ZN5cxx201fENS_1BIPiXadsoiL_ZNS_1qEEEEEE( - void f(B) {} - // CXX20: define {{.*}} @_ZN5cxx201fENS_1AIXadsoKiL_ZNS_1qEEEEEE( - void f(A<(const int*)&q.x>) {} - // CXX20: define {{.*}} @_ZN5cxx201fENS_1BIPKiXadsoS1_L_ZNS_1qEEEEEE - void f(B) {} - // CXX20: define {{.*}} @_ZN5cxx201fENS_1AIXcvPvadsoiL_ZNS_1qEEEEEE( - void f(A<(void*)&q.x>) {} - // CXX20: define {{.*}} @_ZN5cxx201fENS_1BIPvXadsoiL_ZNS_1qEEEEEE( - void f(B) {} - // CXX20: define {{.*}} @_ZN5cxx201fENS_1AIXcvPKvadsoiL_ZNS_1qEEEEEE( - void f(A<(const void*)&q.x>) {} - // CXX20: define {{.*}} @_ZN5cxx201fENS_1BIPKvXadsoiL_ZNS_1qEEEEEE( - void f(B) {} + struct Q { int x; }; // CXX20: define {{.*}} @_ZN5cxx201fENS_1AIXadL_ZNS_1Q1xEEEEE( void f(A<&Q::x>) {} @@ -282,17 +255,6 @@ namespace cxx20 { void f(A<(const int Q::*)&Q::x>) {} // CXX20: define {{.*}} @_ZN5cxx201fENS_1BIMNS_1QEKiXadL_ZNS1_1xEEEEE( void f(B) {} - - struct R : Q {}; - - // CXX20: define {{.*}} @_ZN5cxx201fENS_1AIXmcMNS_1REiadL_ZNS_1Q1xEEEEEE( - void f(A<(int R::*)&Q::x>) {} - // CXX20: define {{.*}} @_ZN5cxx201fENS_1BIMNS_1REiXmcS2_adL_ZNS_1Q1xEEEEEE( - void f(B) {} - // CXX20: define {{.*}} @_ZN5cxx201fENS_1AIXmcMNS_1REKiadL_ZNS_1Q1xEEEEEE( - void f(A<(const int R::*)&Q::x>) {} - // CXX20: define {{.*}} @_ZN5cxx201fENS_1BIMNS_1REKiXmcS3_adL_ZNS_1Q1xEEEEEE( - void f(B) {} } #endif diff --git a/clang/test/CodeGenCXX/template-arguments.cpp b/clang/test/CodeGenCXX/template-arguments.cpp deleted file mode 100644 index fd6f469e88fcc..0000000000000 --- a/clang/test/CodeGenCXX/template-arguments.cpp +++ /dev/null @@ -1,81 +0,0 @@ -// RUN: %clang_cc1 -std=c++20 %s -emit-llvm -o - -triple x86_64-linux -DCONSTEXPR= | FileCheck %s -// RUN: %clang_cc1 -std=c++20 %s -emit-llvm -o - -triple x86_64-linux -DCONSTEXPR=constexpr | FileCheck %s --check-prefix=CONST - -template CONSTEXPR T id(T v) { return v; } -template auto value = id(V); - -// CHECK: call {{.*}} @_Z2idIiET_S0_(i32 1) -// CONST: @_Z5valueILi1EE = weak_odr {{.*}} i32 1, -template int value<1>; - -// CHECK: call {{.*}} @_Z2idIyET_S0_(i64 -1) -// CONST: @_Z5valueILy18446744073709551615EE = weak_odr {{.*}} i64 -1, -template unsigned long long value<-1ULL>; - -// CHECK: call {{.*}} @_Z2idIfET_S0_(float 1.000000e+00) -// CONST: @_Z5valueILf3f800000EE = weak_odr {{.*}} float 1.000000e+00, -template float value<1.0f>; -// CHECK: call {{.*}} @_Z2idIdET_S0_(double 1.000000e+00) -// CONST: @_Z5valueILd3ff0000000000000EE = weak_odr {{.*}} double 1.000000e+00, -template double value<1.0>; - -int n; -// CHECK: call {{.*}} @_Z2idIPiET_S1_(i32* @n) -// CONST: @_Z5valueIXadL_Z1nEEE = weak_odr {{.*}} i32* @n, -template int *value<&n>; - -struct A { int a[3]; } a; -// CHECK: call {{.*}} @_Z2idIPiET_S1_(i32* getelementptr inbounds (%struct.A, %struct.A* @a, i32 0, i32 0, i32 0)) -// CONST: @_Z5valueIXadsoiL_Z1aEEEE = weak_odr {{.*}} i32* getelementptr inbounds (%struct.A, %struct.A* @a, i32 0, i32 0, i32 0), -template int *value<&a.a[0]>; -// CHECK: call {{.*}} @_Z2idIPiET_S1_(i32* bitcast (i8* getelementptr (i8, i8* bitcast (%struct.A* @a to i8*), i64 4) to i32*)) -// CONST: @_Z5valueIXadsoiL_Z1aE4EEE = weak_odr {{.*}} i32* bitcast (i8* getelementptr (i8, i8* bitcast (%struct.A* @a to i8*), i64 4) to i32*), -template int *value<&a.a[1]>; -// CHECK: call {{.*}} @_Z2idIPiET_S1_(i32* bitcast (i8* getelementptr (i8, i8* bitcast (%struct.A* @a to i8*), i64 8) to i32*)) -// CONST: @_Z5valueIXadsoiL_Z1aE8EEE = weak_odr {{.*}} i32* bitcast (i8* getelementptr (i8, i8* bitcast (%struct.A* @a to i8*), i64 8) to i32*), -template int *value<&a.a[2]>; -// CHECK: call {{.*}} @_Z2idIPiET_S1_(i32* bitcast (i8* getelementptr (i8, i8* bitcast (%struct.A* @a to i8*), i64 12) to i32*)) -// CONST: @_Z5valueIXadsoiL_Z1aE12pEEE = weak_odr {{.*}} i32* bitcast (i8* getelementptr (i8, i8* bitcast (%struct.A* @a to i8*), i64 12) to i32*), -template int *value<&a.a[3]>; - -struct B { int x, y; }; -// CHECK: call {{.*}} @_Z2idIM1BiET_S2_(i64 0) -// CONST: @_Z5valueIXadL_ZN1B1xEEEE = weak_odr {{.*}} i64 0, -template int B::*value<&B::x>; -// CHECK: call {{.*}} @_Z2idIM1BiET_S2_(i64 4) -// CONST: @_Z5valueIXadL_ZN1B1yEEEE = weak_odr {{.*}} i64 4, -template int B::*value<&B::y>; - -struct C : A, B { int z; }; -// CHECK: call {{.*}} @_Z2idIM1CiET_S2_(i64 12) -// CONST: @_Z5valueIXmcM1CiadL_ZN1B1xEE12EEE = weak_odr {{.*}} i64 12, -template int C::*value<(int C::*)&B::x>; -// CHECK: call {{.*}} @_Z2idIM1BiET_S2_(i64 8) -// CONST: @_Z5valueIXmcM1BiadL_ZN1C1zEEn12EEE = weak_odr {{.*}} i64 8, -template int B::*value<(int B::*)&C::z>; - -// CHECK: store i32 1, i32* -// CHECK: store i32 2, i32* -// CHECK: bitcast { i32, i32 }* %{{.*}} to i64* -// CHECK: load i64, -// CHECK: call {{.*}} @_Z2idICiET_S1_(i64 % -// CONST: @_Z5valueIXtlCiLi1ELi2EEEE = weak_odr {{.*}} { i32, i32 } { i32 1, i32 2 }, -template _Complex int value<1 + 2j>; - -// CHECK: store float 1.000000e+00, float* -// CHECK: store float 2.000000e+00, float* -// CHECK: bitcast { float, float }* %{{.*}} to <2 x float>* -// CHECK: load <2 x float>, -// CHECK: call {{.*}} @_Z2idICfET_S1_(<2 x float> % -// CONST: @_Z5valueIXtlCfLf3f800000ELf40000000EEEE = weak_odr {{.*}} { float, float } { float 1.000000e+00, float 2.000000e+00 }, -template _Complex float value<1.0f + 2.0fj>; - -using V3i __attribute__((ext_vector_type(3))) = int; -// CHECK: call {{.*}} @_Z2idIDv3_iET_S1_(<3 x i32> ) -// CONST: @_Z5valueIXtlDv3_iLi1ELi2ELi3EEEE = weak_odr {{.*}} <3 x i32> -template V3i value; - -using V3f [[gnu::vector_size(12)]] = float; -// CHECK: call {{.*}} @_Z2idIDv3_fET_S1_(<3 x float> ) -// CONST: @_Z5valueIXtlDv3_fLf3f800000ELf40000000ELf40400000EEEE = weak_odr {{.*}} <3 x float> -template V3f value; diff --git a/clang/test/SemaTemplate/temp_arg_nontype_cxx17.cpp b/clang/test/SemaTemplate/temp_arg_nontype_cxx17.cpp index bc8a22e890415..52cf51719f05a 100644 --- a/clang/test/SemaTemplate/temp_arg_nontype_cxx17.cpp +++ b/clang/test/SemaTemplate/temp_arg_nontype_cxx17.cpp @@ -2,7 +2,7 @@ template struct A {}; -template constexpr bool is_same = false; +template constexpr bool is_same = false; // expected-note +{{here}} template constexpr bool is_same = true; namespace String { @@ -84,32 +84,34 @@ namespace PtrMem { constexpr int B::*b = &B::b; constexpr int C::*cb = b; constexpr int D::*db = b; - constexpr int E::*ecb = cb; - constexpr int E::*edb = db; + constexpr int E::*ecb = cb; // expected-note +{{here}} + constexpr int E::*edb = db; // expected-note +{{here}} constexpr int E::*e = &E::e; constexpr int D::*de = (int D::*)e; constexpr int C::*ce = (int C::*)e; - constexpr int B::*bde = (int B::*)de; - constexpr int B::*bce = (int B::*)ce; + constexpr int B::*bde = (int B::*)de; // expected-note +{{here}} + constexpr int B::*bce = (int B::*)ce; // expected-note +{{here}} + // FIXME: This should all be accepted, but we don't yet have a representation + // nor mangling for this form of template argument. using Ab = A; using Ab = A; - using Abce = A; - using Abde = A; - static_assert(!is_same, ""); - static_assert(!is_same, ""); - static_assert(!is_same, ""); - static_assert(is_same>, ""); + using Abce = A; // expected-error {{not supported}} + using Abde = A; // expected-error {{not supported}} + static_assert(!is_same, ""); // expected-error {{undeclared}} expected-error {{must be a type}} + static_assert(!is_same, ""); // expected-error {{undeclared}} expected-error {{must be a type}} + static_assert(!is_same, ""); // expected-error 2{{undeclared}} expected-error {{must be a type}} + static_assert(is_same>, ""); // expected-error {{undeclared}} expected-error {{not supported}} using Ae = A; using Ae = A; - using Aecb = A; - using Aedb = A; - static_assert(!is_same, ""); - static_assert(!is_same, ""); - static_assert(!is_same, ""); - static_assert(is_same>, ""); + using Aecb = A; // expected-error {{not supported}} + using Aedb = A; // expected-error {{not supported}} + static_assert(!is_same, ""); // expected-error {{undeclared}} expected-error {{must be a type}} + static_assert(!is_same, ""); // expected-error {{undeclared}} expected-error {{must be a type}} + static_assert(!is_same, ""); // expected-error 2{{undeclared}} expected-error {{must be a type}} + static_assert(is_same>, ""); // expected-error {{undeclared}} expected-error {{not supported}} using An = A; using A0 = A; @@ -203,9 +205,9 @@ namespace Auto { struct Y : X {}; void type_affects_identity(B<&X::n>) {} - void type_affects_identity(B<(int Y::*)&X::n>) {} + void type_affects_identity(B<(int Y::*)&X::n>) {} // FIXME: expected-error {{sorry}} void type_affects_identity(B<(const int X::*)&X::n>) {} - void type_affects_identity(B<(const int Y::*)&X::n>) {} + void type_affects_identity(B<(const int Y::*)&X::n>) {} // FIXME: expected-error {{sorry}} // A case where we need to do auto-deduction, and check whether the // resulting dependent types match during partial ordering. These diff --git a/clang/test/SemaTemplate/temp_arg_nontype_cxx20.cpp b/clang/test/SemaTemplate/temp_arg_nontype_cxx20.cpp index 48101cccfce0a..d514465f7d677 100644 --- a/clang/test/SemaTemplate/temp_arg_nontype_cxx20.cpp +++ b/clang/test/SemaTemplate/temp_arg_nontype_cxx20.cpp @@ -8,8 +8,8 @@ namespace std { // floating-point arguments template struct Float {}; -using F1 = Float<1.0f>; -using F1 = Float<2.0f / 2>; +using F1 = Float<1.0f>; // FIXME expected-error {{sorry}} +using F1 = Float<2.0f / 2>; // FIXME expected-error {{sorry}} struct S { int n[3]; } s; // expected-note 1+{{here}} union U { int a, b; } u; @@ -17,24 +17,24 @@ int n; // expected-note 1+{{here}} // pointers to subobjects template struct IntPtr {}; -using IPn = IntPtr<&n + 1>; -using IPn = IntPtr<&n + 1>; +using IPn = IntPtr<&n + 1>; // FIXME expected-error {{refers to subobject}} +using IPn = IntPtr<&n + 1>; // FIXME expected-error {{refers to subobject}} -using IP2 = IntPtr<&s.n[2]>; -using IP2 = IntPtr; +using IP2 = IntPtr<&s.n[2]>; // FIXME expected-error {{refers to subobject}} +using IP2 = IntPtr; // FIXME expected-error {{refers to subobject}} -using IP3 = IntPtr<&s.n[3]>; -using IP3 = IntPtr; +using IP3 = IntPtr<&s.n[3]>; // FIXME expected-error {{refers to subobject}} +using IP3 = IntPtr; // FIXME expected-error {{refers to subobject}} template struct IntRef {}; -using IRn = IntRef<*(&n + 1)>; // expected-error {{not a constant expression}} expected-note {{dereferenced pointer past the end of 'n'}} -using IRn = IntRef<*(&n + 1)>; // expected-error {{not a constant expression}} expected-note {{dereferenced pointer past the end of 'n'}} +using IPn = IntRef<*(&n + 1)>; // expected-error {{not a constant expression}} expected-note {{dereferenced pointer past the end of 'n'}} +using IPn = IntRef<*(&n + 1)>; // expected-error {{not a constant expression}} expected-note {{dereferenced pointer past the end of 'n'}} -using IR2 = IntRef; -using IR2 = IntRef<*(s.n + 2)>; +using IP2 = IntRef; // FIXME expected-error {{refers to subobject}} +using IP2 = IntRef<*(s.n + 2)>; // FIXME expected-error {{refers to subobject}} -using IR3 = IntRef; // expected-error {{not a constant expression}} expected-note {{dereferenced pointer past the end of subobject of 's'}} -using IR3 = IntRef<*(s.n + 3)>; // expected-error {{not a constant expression}} expected-note {{dereferenced pointer past the end of subobject of 's'}} +using IP3 = IntRef; // expected-error {{not a constant expression}} expected-note {{dereferenced pointer past the end of subobject of 's'}} +using IP3 = IntRef<*(s.n + 3)>; // expected-error {{not a constant expression}} expected-note {{dereferenced pointer past the end of subobject of 's'}} // classes template struct Struct {}; @@ -48,12 +48,12 @@ using U1 = Union; // expected-error {{different types}} // miscellaneous scalar types template<_Complex int> struct ComplexInt {}; -using CI = ComplexInt<1 + 3i>; -using CI = ComplexInt<3i + 1>; +using CI = ComplexInt<1 + 3i>; // FIXME: expected-error {{sorry}} +using CI = ComplexInt<1 + 3i>; // FIXME: expected-error {{sorry}} template<_Complex float> struct ComplexFloat {}; -using CF = ComplexFloat<1.0f + 3.0fi>; -using CF = ComplexFloat<3.0fi + 1.0f>; +using CF = ComplexFloat<1.0f + 3.0fi>; // FIXME: expected-error {{sorry}} +using CF = ComplexFloat<1.0f + 3.0fi>; // FIXME: expected-error {{sorry}} namespace ClassNTTP { struct A { // expected-note 2{{candidate}} @@ -307,11 +307,11 @@ namespace dependent { if constexpr (N < 10) return R(); else if constexpr (N < 20) - return R(); + return R(); // FIXME: expected-error 2{{refers to subobject}} else if constexpr (N < 30) return S<&n>(); else if constexpr (N < 40) - return S<&vn.v>(); + return S<&vn.v>(); // FIXME: expected-error 2{{refers to subobject}} else if constexpr (N < 50) return T{n}>(); else if constexpr (N < 60) @@ -322,15 +322,15 @@ namespace dependent { return T{&vn.v}>(); } template void check() { - auto v = f(); - auto w = f(); + auto v = f(); // FIXME: expected-note 2{{instantiation of}} + auto w = f(); // FIXME: expected-note 2{{instantiation of}} static_assert(!__is_same(decltype(v), decltype(w))); static_assert(v != w); } template void check<0>(); - template void check<10>(); + template void check<10>(); // FIXME: expected-note 2{{instantiation of}} template void check<20>(); - template void check<30>(); + template void check<30>(); // FIXME: expected-note 2{{instantiation of}} template void check<40>(); template void check<50>(); template void check<60>(); diff --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp index 8a4d3cbcf9393..f1008319ddc74 100644 --- a/clang/tools/libclang/CIndex.cpp +++ b/clang/tools/libclang/CIndex.cpp @@ -1493,11 +1493,6 @@ bool CursorVisitor::VisitTemplateArgumentLoc(const TemplateArgumentLoc &TAL) { return Visit(MakeCXCursor(E, StmtParent, TU, RegionOfInterest)); return false; - case TemplateArgument::UncommonValue: - if (Expr *E = TAL.getSourceUncommonValueExpression()) - return Visit(MakeCXCursor(E, StmtParent, TU, RegionOfInterest)); - return false; - case TemplateArgument::NullPtr: if (Expr *E = TAL.getSourceNullPtrExpression()) return Visit(MakeCXCursor(E, StmtParent, TU, RegionOfInterest)); diff --git a/clang/tools/libclang/CXCursor.cpp b/clang/tools/libclang/CXCursor.cpp index 851b418b6d7be..180cf1858d04c 100644 --- a/clang/tools/libclang/CXCursor.cpp +++ b/clang/tools/libclang/CXCursor.cpp @@ -1375,9 +1375,6 @@ enum CXTemplateArgumentKind clang_Cursor_getTemplateArgumentKind(CXCursor C, return CXTemplateArgumentKind_NullPtr; case TemplateArgument::Integral: return CXTemplateArgumentKind_Integral; - case TemplateArgument::UncommonValue: - // FIXME: Expose these values. - return CXTemplateArgumentKind_Invalid; case TemplateArgument::Template: return CXTemplateArgumentKind_Template; case TemplateArgument::TemplateExpansion: From af0dbaaa38f54b0366177aae43545a8848d3fe56 Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Tue, 22 Dec 2020 10:12:40 -0800 Subject: [PATCH 114/378] Revert "Following up on PR48517, fix handling of template arguments that refer" This reverts commit 8c1f2d15b826591cdf6bd6b468b8a7d23377b29e. This is part of 5 commits being reverted due to https://crbug.com/1161059. See bug for repro. --- clang/include/clang/AST/Expr.h | 14 +-- clang/include/clang/AST/TemplateBase.h | 13 +-- clang/include/clang/Sema/Sema.h | 3 +- clang/lib/AST/ComputeDependence.cpp | 13 +-- clang/lib/AST/Expr.cpp | 35 +++--- clang/lib/AST/ExprCXX.cpp | 11 +- clang/lib/AST/ExprConstant.cpp | 101 ++++++++---------- clang/lib/AST/TemplateBase.cpp | 42 ++++++-- clang/lib/Sema/SemaOverload.cpp | 15 +-- clang/lib/Sema/SemaTemplate.cpp | 30 +++--- clang/lib/Sema/SemaTemplateInstantiate.cpp | 8 +- .../distribute_dist_schedule_messages.cpp | 2 +- ...te_parallel_for_dist_schedule_messages.cpp | 2 +- ...rallel_for_simd_dist_schedule_messages.cpp | 2 +- ...distribute_simd_dist_schedule_messages.cpp | 2 +- ...et_parallel_for_simd_collapse_messages.cpp | 2 +- ...get_parallel_for_simd_ordered_messages.cpp | 2 +- .../OpenMP/target_simd_collapse_messages.cpp | 2 +- ...eams_distribute_dist_schedule_messages.cpp | 2 +- ...te_parallel_for_dist_schedule_messages.cpp | 2 +- ...rallel_for_simd_dist_schedule_messages.cpp | 2 +- ...distribute_simd_dist_schedule_messages.cpp | 2 +- .../OpenMP/target_update_from_messages.cpp | 2 +- .../test/OpenMP/target_update_to_messages.cpp | 2 +- clang/test/OpenMP/task_messages.cpp | 4 +- ...eams_distribute_dist_schedule_messages.cpp | 2 +- ...te_parallel_for_dist_schedule_messages.cpp | 2 +- ...rallel_for_simd_dist_schedule_messages.cpp | 2 +- ...distribute_simd_dist_schedule_messages.cpp | 2 +- .../SemaCXX/warn-unused-lambda-capture.cpp | 2 +- ...e_cxx17.cpp => temp_arg_nontype_cxx1z.cpp} | 10 -- .../SemaTemplate/temp_arg_nontype_cxx20.cpp | 44 -------- 32 files changed, 158 insertions(+), 221 deletions(-) rename clang/test/SemaTemplate/{temp_arg_nontype_cxx17.cpp => temp_arg_nontype_cxx1z.cpp} (98%) diff --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h index e1c3b69441420..c8d87ec48a3f0 100644 --- a/clang/include/clang/AST/Expr.h +++ b/clang/include/clang/AST/Expr.h @@ -578,12 +578,12 @@ class Expr : public ValueStmt { struct EvalStatus { /// Whether the evaluated expression has side effects. /// For example, (f() && 0) can be folded, but it still has side effects. - bool HasSideEffects = false; + bool HasSideEffects; /// Whether the evaluation hit undefined behavior. /// For example, 1.0 / 0.0 can be folded to Inf, but has undefined behavior. /// Likewise, INT_MAX + 1 can be folded to INT_MIN, but has UB. - bool HasUndefinedBehavior = false; + bool HasUndefinedBehavior; /// Diag - If this is non-null, it will be filled in with a stack of notes /// indicating why evaluation failed (or why it failed to produce a constant @@ -592,7 +592,10 @@ class Expr : public ValueStmt { /// foldable. If the expression is foldable, but not a constant expression, /// the notes will describes why it isn't a constant expression. If the /// expression *is* a constant expression, no notes will be produced. - SmallVectorImpl *Diag = nullptr; + SmallVectorImpl *Diag; + + EvalStatus() + : HasSideEffects(false), HasUndefinedBehavior(false), Diag(nullptr) {} // hasSideEffects - Return true if the evaluated expression has // side effects. @@ -603,11 +606,8 @@ class Expr : public ValueStmt { /// EvalResult is a struct with detailed info about an evaluated expression. struct EvalResult : EvalStatus { - /// This is the value the expression can be folded to. + /// Val - This is the value the expression can be folded to. APValue Val; - /// Indicates whether Val contains a pointer or reference or pointer to - /// member naming a templated entity, and thus the value is dependent. - bool Dependent = false; // isGlobalLValue - Return true if the evaluated lvalue expression // is global. diff --git a/clang/include/clang/AST/TemplateBase.h b/clang/include/clang/AST/TemplateBase.h index abf873a7ee40f..7967f8a91214d 100644 --- a/clang/include/clang/AST/TemplateBase.h +++ b/clang/include/clang/AST/TemplateBase.h @@ -252,12 +252,6 @@ class TemplateArgument { /// Whether this template argument is dependent on a template /// parameter such that its result can change from one instantiation to /// another. - /// - /// It's not always meaningful to ask whether a template argument is - /// dependent before it's been converted to match a template parameter; - /// whether a non-type template argument is dependent depends on the - /// corresponding parameter. For an unconverted template argument, this - /// returns true if the argument *might* be dependent. bool isDependent() const; /// Whether this template argument is dependent on a template @@ -680,6 +674,13 @@ struct alignas(void *) ASTTemplateKWAndArgsInfo { void initializeFrom(SourceLocation TemplateKWLoc, const TemplateArgumentListInfo &List, TemplateArgumentLoc *OutArgArray); + // FIXME: The parameter Deps is the result populated by this method, the + // caller doesn't need it since it is populated by computeDependence. remove + // it. + void initializeFrom(SourceLocation TemplateKWLoc, + const TemplateArgumentListInfo &List, + TemplateArgumentLoc *OutArgArray, + TemplateArgumentDependence &Deps); void initializeFrom(SourceLocation TemplateKWLoc); void copyInto(const TemplateArgumentLoc *ArgArray, diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 2c781eb88415f..6b81494e8eff0 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -3353,8 +3353,7 @@ class Sema final { llvm::APSInt &Value, CCEKind CCE); ExprResult CheckConvertedConstantExpression(Expr *From, QualType T, APValue &Value, CCEKind CCE, - NamedDecl *Dest = nullptr, - bool *ValueDependent = nullptr); + NamedDecl *Dest = nullptr); /// Abstract base class used to perform a contextual implicit /// conversion from an expression to any type passing a filter. diff --git a/clang/lib/AST/ComputeDependence.cpp b/clang/lib/AST/ComputeDependence.cpp index 5262e3cbe233b..4026fdc76fd6f 100644 --- a/clang/lib/AST/ComputeDependence.cpp +++ b/clang/lib/AST/ComputeDependence.cpp @@ -64,7 +64,7 @@ ExprDependence clang::computeDependence(UnaryOperator *E, if (VD && VD->isTemplated()) { auto *VarD = dyn_cast(VD); if (!VarD || !VarD->hasLocalStorage()) - Dep |= ExprDependence::ValueInstantiation; + Dep |= ExprDependence::Value; } } } @@ -443,21 +443,12 @@ ExprDependence clang::computeDependence(DeclRefExpr *E, const ASTContext &Ctx) { if (auto *FirstArg = E->getTemplateArgs()) { unsigned NumArgs = E->getNumTemplateArgs(); for (auto *Arg = FirstArg, *End = FirstArg + NumArgs; Arg < End; ++Arg) - Deps |= toExprDependence(Arg->getArgument().getDependence() & - ~TemplateArgumentDependence::Dependent); + Deps |= toExprDependence(Arg->getArgument().getDependence()); } auto *Decl = E->getDecl(); - auto *Found = E->getFoundDecl(); auto Type = E->getType(); - // FIXME: For a ParmVarDecl referenced in a function signature, we don't know - // its dependence yet! - if (!isa(Decl)) { - if (Decl->getDeclContext()->isDependentContext() || - (Found && Found->getDeclContext()->isDependentContext())) - Deps |= ExprDependence::Instantiation; - } if (Decl->isParameterPack()) Deps |= ExprDependence::UnexpandedPack; Deps |= toExprDependence(Type->getDependence()) & ExprDependence::Error; diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp index dafa7136ecb4e..a274bf37a407a 100644 --- a/clang/lib/AST/Expr.cpp +++ b/clang/lib/AST/Expr.cpp @@ -416,9 +416,12 @@ DeclRefExpr::DeclRefExpr(const ASTContext &Ctx, RefersToEnclosingVariableOrCapture; DeclRefExprBits.NonOdrUseReason = NOUR; if (TemplateArgs) { + auto Deps = TemplateArgumentDependence::None; getTrailingObjects()->initializeFrom( - TemplateKWLoc, *TemplateArgs, - getTrailingObjects()); + TemplateKWLoc, *TemplateArgs, getTrailingObjects(), + Deps); + assert(!(Deps & TemplateArgumentDependence::Dependent) && + "built a DeclRefExpr with dependent template args"); } else if (TemplateKWLoc.isValid()) { getTrailingObjects()->initializeFrom( TemplateKWLoc); @@ -1521,8 +1524,16 @@ MemberExpr *MemberExpr::Create( MemberExpr *E = new (Mem) MemberExpr(Base, IsArrow, OperatorLoc, MemberDecl, NameInfo, T, VK, OK, NOUR); - // FIXME: Move this into the constructor. + // FIXME: remove remaining dependence computation to computeDependence(). + auto Deps = E->getDependence(); if (HasQualOrFound) { + // FIXME: Wrong. We should be looking at the member declaration we found. + if (QualifierLoc && QualifierLoc.getNestedNameSpecifier()->isDependent()) + Deps |= ExprDependence::TypeValueInstantiation; + else if (QualifierLoc && + QualifierLoc.getNestedNameSpecifier()->isInstantiationDependent()) + Deps |= ExprDependence::Instantiation; + E->MemberExprBits.HasQualifierOrFoundDecl = true; MemberExprNameQualifier *NQ = @@ -1535,26 +1546,16 @@ MemberExpr *MemberExpr::Create( TemplateArgs || TemplateKWLoc.isValid(); if (TemplateArgs) { + auto TemplateArgDeps = TemplateArgumentDependence::None; E->getTrailingObjects()->initializeFrom( TemplateKWLoc, *TemplateArgs, - E->getTrailingObjects()); + E->getTrailingObjects(), TemplateArgDeps); + if (TemplateArgDeps & TemplateArgumentDependence::Instantiation) + Deps |= ExprDependence::Instantiation; } else if (TemplateKWLoc.isValid()) { E->getTrailingObjects()->initializeFrom( TemplateKWLoc); } - - // FIXME: remove remaining dependence computation to computeDependence(). - auto Deps = E->getDependence(); - if (NestedNameSpecifier *Qual = E->getQualifier()) { - // FIXME: Wrong. We should be looking at the member declaration we found. - if (Qual->isDependent()) - Deps |= ExprDependence::TypeValueInstantiation; - else if (Qual->isInstantiationDependent()) - Deps |= ExprDependence::Instantiation; - } - if (TemplateSpecializationType::anyInstantiationDependentTemplateArguments( - E->template_arguments())) - Deps |= ExprDependence::Instantiation; E->setDependence(Deps); return E; diff --git a/clang/lib/AST/ExprCXX.cpp b/clang/lib/AST/ExprCXX.cpp index e1f658923519a..8dc9d4296e149 100644 --- a/clang/lib/AST/ExprCXX.cpp +++ b/clang/lib/AST/ExprCXX.cpp @@ -433,8 +433,9 @@ OverloadExpr::OverloadExpr(StmtClass SC, const ASTContext &Context, } if (TemplateArgs) { + auto Deps = TemplateArgumentDependence::None; getTrailingASTTemplateKWAndArgsInfo()->initializeFrom( - TemplateKWLoc, *TemplateArgs, getTrailingTemplateArgumentLoc()); + TemplateKWLoc, *TemplateArgs, getTrailingTemplateArgumentLoc(), Deps); } else if (TemplateKWLoc.isValid()) { getTrailingASTTemplateKWAndArgsInfo()->initializeFrom(TemplateKWLoc); } @@ -463,8 +464,9 @@ DependentScopeDeclRefExpr::DependentScopeDeclRefExpr( DependentScopeDeclRefExprBits.HasTemplateKWAndArgsInfo = (Args != nullptr) || TemplateKWLoc.isValid(); if (Args) { + auto Deps = TemplateArgumentDependence::None; getTrailingObjects()->initializeFrom( - TemplateKWLoc, *Args, getTrailingObjects()); + TemplateKWLoc, *Args, getTrailingObjects(), Deps); } else if (TemplateKWLoc.isValid()) { getTrailingObjects()->initializeFrom( TemplateKWLoc); @@ -1374,9 +1376,10 @@ CXXDependentScopeMemberExpr::CXXDependentScopeMemberExpr( CXXDependentScopeMemberExprBits.OperatorLoc = OperatorLoc; if (TemplateArgs) { + auto Deps = TemplateArgumentDependence::None; getTrailingObjects()->initializeFrom( - TemplateKWLoc, *TemplateArgs, - getTrailingObjects()); + TemplateKWLoc, *TemplateArgs, getTrailingObjects(), + Deps); } else if (TemplateKWLoc.isValid()) { getTrailingObjects()->initializeFrom( TemplateKWLoc); diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 231a8c3bc8bd1..56181bbe11668 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -1819,8 +1819,7 @@ static bool EvaluateFloat(const Expr *E, APFloat &Result, EvalInfo &Info); static bool EvaluateComplex(const Expr *E, ComplexValue &Res, EvalInfo &Info); static bool EvaluateAtomic(const Expr *E, const LValue *This, APValue &Result, EvalInfo &Info); -static bool EvaluateAsRValue(EvalInfo &Info, const Expr *E, APValue &Result, - bool &Dependent); +static bool EvaluateAsRValue(EvalInfo &Info, const Expr *E, APValue &Result); /// Evaluate an integer or fixed point expression into an APResult. static bool EvaluateFixedPointOrInteger(const Expr *E, APFixedPoint &Result, @@ -2108,8 +2107,7 @@ static bool CheckEvaluationResult(CheckEvaluationResultKind CERK, QualType Type, const APValue &Value, ConstantExprKind Kind, SourceLocation SubobjectLoc, - CheckedTemporaries &CheckedTemps, - bool &Dependent); + CheckedTemporaries &CheckedTemps); /// Check that this reference or pointer core constant expression is a valid /// value for an address or reference constant expression. Return true if we @@ -2117,8 +2115,7 @@ static bool CheckEvaluationResult(CheckEvaluationResultKind CERK, static bool CheckLValueConstantExpression(EvalInfo &Info, SourceLocation Loc, QualType Type, const LValue &LVal, ConstantExprKind Kind, - CheckedTemporaries &CheckedTemps, - bool &Dependent) { + CheckedTemporaries &CheckedTemps) { bool IsReferenceType = Type->isReferenceType(); APValue::LValueBase Base = LVal.getLValueBase(); @@ -2203,8 +2200,6 @@ static bool CheckLValueConstantExpression(EvalInfo &Info, SourceLocation Loc, } if (BaseVD) { - Dependent |= BaseVD->isTemplated(); - if (const VarDecl *Var = dyn_cast(BaseVD)) { // Check if this is a thread-local variable. if (Var->getTLSKind()) @@ -2235,9 +2230,6 @@ static bool CheckLValueConstantExpression(EvalInfo &Info, SourceLocation Loc, } } else if (const auto *MTE = dyn_cast_or_null(BaseE)) { - if (auto *Extending = MTE->getExtendingDecl()) - Dependent |= Extending->isTemplated(); - if (CheckedTemps.insert(MTE).second) { QualType TempType = getType(Base); if (TempType.isDestructedType()) { @@ -2250,8 +2242,8 @@ static bool CheckLValueConstantExpression(EvalInfo &Info, SourceLocation Loc, APValue *V = MTE->getOrCreateValue(false); assert(V && "evasluation result refers to uninitialised temporary"); if (!CheckEvaluationResult(CheckEvaluationResultKind::ConstantExpression, - Info, MTE->getExprLoc(), TempType, *V, Kind, - SourceLocation(), CheckedTemps, Dependent)) + Info, MTE->getExprLoc(), TempType, *V, + Kind, SourceLocation(), CheckedTemps)) return false; } } @@ -2280,15 +2272,13 @@ static bool CheckLValueConstantExpression(EvalInfo &Info, SourceLocation Loc, /// Member pointers are constant expressions unless they point to a /// non-virtual dllimport member function. -static bool -CheckMemberPointerConstantExpression(EvalInfo &Info, SourceLocation Loc, - QualType Type, const APValue &Value, - ConstantExprKind Kind, bool &Dependent) { +static bool CheckMemberPointerConstantExpression(EvalInfo &Info, + SourceLocation Loc, + QualType Type, + const APValue &Value, + ConstantExprKind Kind) { const ValueDecl *Member = Value.getMemberPointerDecl(); - if (!Member) - return true; - Dependent |= Member->isTemplated(); - const auto *FD = dyn_cast(Member); + const auto *FD = dyn_cast_or_null(Member); if (!FD) return true; if (FD->isConsteval()) { @@ -2337,8 +2327,7 @@ static bool CheckEvaluationResult(CheckEvaluationResultKind CERK, QualType Type, const APValue &Value, ConstantExprKind Kind, SourceLocation SubobjectLoc, - CheckedTemporaries &CheckedTemps, - bool &Dependent) { + CheckedTemporaries &CheckedTemps) { if (!Value.hasValue()) { Info.FFDiag(DiagLoc, diag::note_constexpr_uninitialized) << true << Type; @@ -2360,20 +2349,20 @@ static bool CheckEvaluationResult(CheckEvaluationResultKind CERK, for (unsigned I = 0, N = Value.getArrayInitializedElts(); I != N; ++I) { if (!CheckEvaluationResult(CERK, Info, DiagLoc, EltTy, Value.getArrayInitializedElt(I), Kind, - SubobjectLoc, CheckedTemps, Dependent)) + SubobjectLoc, CheckedTemps)) return false; } if (!Value.hasArrayFiller()) return true; return CheckEvaluationResult(CERK, Info, DiagLoc, EltTy, Value.getArrayFiller(), Kind, SubobjectLoc, - CheckedTemps, Dependent); + CheckedTemps); } if (Value.isUnion() && Value.getUnionField()) { return CheckEvaluationResult( CERK, Info, DiagLoc, Value.getUnionField()->getType(), Value.getUnionValue(), Kind, Value.getUnionField()->getLocation(), - CheckedTemps, Dependent); + CheckedTemps); } if (Value.isStruct()) { RecordDecl *RD = Type->castAs()->getDecl(); @@ -2382,7 +2371,7 @@ static bool CheckEvaluationResult(CheckEvaluationResultKind CERK, for (const CXXBaseSpecifier &BS : CD->bases()) { if (!CheckEvaluationResult(CERK, Info, DiagLoc, BS.getType(), Value.getStructBase(BaseIndex), Kind, - BS.getBeginLoc(), CheckedTemps, Dependent)) + BS.getBeginLoc(), CheckedTemps)) return false; ++BaseIndex; } @@ -2392,8 +2381,8 @@ static bool CheckEvaluationResult(CheckEvaluationResultKind CERK, continue; if (!CheckEvaluationResult(CERK, Info, DiagLoc, I->getType(), - Value.getStructField(I->getFieldIndex()), Kind, - I->getLocation(), CheckedTemps, Dependent)) + Value.getStructField(I->getFieldIndex()), + Kind, I->getLocation(), CheckedTemps)) return false; } } @@ -2403,13 +2392,12 @@ static bool CheckEvaluationResult(CheckEvaluationResultKind CERK, LValue LVal; LVal.setFrom(Info.Ctx, Value); return CheckLValueConstantExpression(Info, DiagLoc, Type, LVal, Kind, - CheckedTemps, Dependent); + CheckedTemps); } if (Value.isMemberPointer() && CERK == CheckEvaluationResultKind::ConstantExpression) - return CheckMemberPointerConstantExpression(Info, DiagLoc, Type, Value, - Kind, Dependent); + return CheckMemberPointerConstantExpression(Info, DiagLoc, Type, Value, Kind); // Everything else is fine. return true; @@ -2420,7 +2408,7 @@ static bool CheckEvaluationResult(CheckEvaluationResultKind CERK, /// check that the expression is of literal type. static bool CheckConstantExpression(EvalInfo &Info, SourceLocation DiagLoc, QualType Type, const APValue &Value, - ConstantExprKind Kind, bool &Dependent) { + ConstantExprKind Kind) { // Nothing to check for a constant expression of type 'cv void'. if (Type->isVoidType()) return true; @@ -2428,18 +2416,17 @@ static bool CheckConstantExpression(EvalInfo &Info, SourceLocation DiagLoc, CheckedTemporaries CheckedTemps; return CheckEvaluationResult(CheckEvaluationResultKind::ConstantExpression, Info, DiagLoc, Type, Value, Kind, - SourceLocation(), CheckedTemps, Dependent); + SourceLocation(), CheckedTemps); } /// Check that this evaluated value is fully-initialized and can be loaded by /// an lvalue-to-rvalue conversion. static bool CheckFullyInitialized(EvalInfo &Info, SourceLocation DiagLoc, QualType Type, const APValue &Value) { - bool Dependent = false; CheckedTemporaries CheckedTemps; return CheckEvaluationResult( CheckEvaluationResultKind::FullyInitialized, Info, DiagLoc, Type, Value, - ConstantExprKind::Normal, SourceLocation(), CheckedTemps, Dependent); + ConstantExprKind::Normal, SourceLocation(), CheckedTemps); } /// Enforce C++2a [expr.const]/4.17, which disallows new-expressions unless @@ -11111,9 +11098,7 @@ static bool EvaluateBuiltinConstantP(EvalInfo &Info, const Expr *Arg) { ArgType->isAnyComplexType() || ArgType->isPointerType() || ArgType->isNullPtrType()) { APValue V; - bool Dependent = false; - if (!::EvaluateAsRValue(Info, Arg, V, Dependent) || - Info.EvalStatus.HasSideEffects) { + if (!::EvaluateAsRValue(Info, Arg, V) || Info.EvalStatus.HasSideEffects) { Fold.keepDiagnostics(); return false; } @@ -11415,8 +11400,7 @@ static bool tryEvaluateBuiltinObjectSize(const Expr *E, unsigned Type, // It's possible for us to be given GLValues if we're called via // Expr::tryEvaluateObjectSize. APValue RVal; - bool Dependent = false; - if (!EvaluateAsRValue(Info, E, RVal, Dependent)) + if (!EvaluateAsRValue(Info, E, RVal)) return false; LVal.setFrom(Info.Ctx, RVal); } else if (!EvaluatePointer(ignorePointerCastsAndParens(E), LVal, Info, @@ -12845,9 +12829,8 @@ bool RecordExprEvaluator::VisitBinCmp(const BinaryOperator *E) { LV.set(VD); if (!handleLValueToRValueConversion(Info, E, E->getType(), LV, Result)) return false; - bool Dependent = false; return CheckConstantExpression(Info, E->getExprLoc(), E->getType(), Result, - ConstantExprKind::Normal, Dependent); + ConstantExprKind::Normal); }; return EvaluateComparisonBinaryOperator(Info, E, OnSuccess, [&]() { return ExprEvaluatorBaseTy::VisitBinCmp(E); @@ -14611,8 +14594,7 @@ static bool EvaluateInPlace(APValue &Result, EvalInfo &Info, const LValue &This, /// EvaluateAsRValue - Try to evaluate this expression, performing an implicit /// lvalue-to-rvalue cast if it is an lvalue. -static bool EvaluateAsRValue(EvalInfo &Info, const Expr *E, APValue &Result, - bool &Dependent) { +static bool EvaluateAsRValue(EvalInfo &Info, const Expr *E, APValue &Result) { assert(!E->isValueDependent()); if (Info.EnableNewConstInterp) { if (!Info.Ctx.getInterpContext().evaluateAsRValue(Info, E, Result)) @@ -14637,7 +14619,7 @@ static bool EvaluateAsRValue(EvalInfo &Info, const Expr *E, APValue &Result, // Check this core constant expression is a constant expression. return CheckConstantExpression(Info, E->getExprLoc(), E->getType(), Result, - ConstantExprKind::Normal, Dependent) && + ConstantExprKind::Normal) && CheckMemoryLeaks(Info); } @@ -14683,7 +14665,7 @@ static bool EvaluateAsRValue(const Expr *E, Expr::EvalResult &Result, if (FastEvaluateAsRValue(E, Result, Ctx, IsConst)) return IsConst; - return EvaluateAsRValue(Info, E, Result.Val, Result.Dependent); + return EvaluateAsRValue(Info, E, Result.Val); } static bool EvaluateAsInt(const Expr *E, Expr::EvalResult &ExprResult, @@ -14793,9 +14775,9 @@ bool Expr::EvaluateAsLValue(EvalResult &Result, const ASTContext &Ctx, CheckedTemporaries CheckedTemps; if (!EvaluateLValue(this, LV, Info) || !Info.discardCleanups() || Result.HasSideEffects || - !CheckLValueConstantExpression( - Info, getExprLoc(), Ctx.getLValueReferenceType(getType()), LV, - ConstantExprKind::Normal, CheckedTemps, Result.Dependent)) + !CheckLValueConstantExpression(Info, getExprLoc(), + Ctx.getLValueReferenceType(getType()), LV, + ConstantExprKind::Normal, CheckedTemps)) return false; LV.moveInto(Result.Val); @@ -14854,7 +14836,7 @@ bool Expr::EvaluateAsConstantExpr(EvalResult &Result, const ASTContext &Ctx, llvm_unreachable("Unhandled cleanup; missing full expression marker?"); if (!CheckConstantExpression(Info, getExprLoc(), getStorageType(Ctx, this), - Result.Val, Kind, Result.Dependent)) + Result.Val, Kind)) return false; if (!CheckMemoryLeaks(Info)) return false; @@ -14918,9 +14900,8 @@ bool Expr::EvaluateAsInitializer(APValue &Value, const ASTContext &Ctx, if (!Info.discardCleanups()) llvm_unreachable("Unhandled cleanup; missing full expression marker?"); } - bool Dependent = false; return CheckConstantExpression(Info, DeclLoc, DeclTy, Value, - ConstantExprKind::Normal, Dependent) && + ConstantExprKind::Normal) && CheckMemoryLeaks(Info); } @@ -14987,7 +14968,7 @@ APSInt Expr::EvaluateKnownConstIntCheckOverflow( Info.InConstantContext = true; Info.CheckingForUndefinedBehavior = true; - bool Result = ::EvaluateAsRValue(this, EVResult, Ctx, Info); + bool Result = ::EvaluateAsRValue(Info, this, EVResult.Val); (void)Result; assert(Result && "Could not evaluate expression"); assert(EVResult.Val.isInt() && "Expression did not evaluate to integer"); @@ -14999,10 +14980,13 @@ void Expr::EvaluateForOverflow(const ASTContext &Ctx) const { assert(!isValueDependent() && "Expression evaluator can't be called on a dependent expression."); + bool IsConst; EvalResult EVResult; - EvalInfo Info(Ctx, EVResult, EvalInfo::EM_IgnoreSideEffects); - Info.CheckingForUndefinedBehavior = true; - (void)::EvaluateAsRValue(this, EVResult, Ctx, Info); + if (!FastEvaluateAsRValue(this, EVResult, Ctx, IsConst)) { + EvalInfo Info(Ctx, EVResult, EvalInfo::EM_IgnoreSideEffects); + Info.CheckingForUndefinedBehavior = true; + (void)::EvaluateAsRValue(Info, this, EVResult.Val); + } } bool Expr::EvalResult::isGlobalLValue() const { @@ -15552,9 +15536,8 @@ bool Expr::isCXX11ConstantExpr(const ASTContext &Ctx, APValue *Result, EvalInfo Info(Ctx, Status, EvalInfo::EM_ConstantExpression); APValue Scratch; - bool Dependent = false; bool IsConstExpr = - ::EvaluateAsRValue(Info, this, Result ? *Result : Scratch, Dependent) && + ::EvaluateAsRValue(Info, this, Result ? *Result : Scratch) && // FIXME: We don't produce a diagnostic for this, but the callers that // call us on arbitrary full-expressions should generally not care. Info.discardCleanups() && !Status.HasSideEffects; diff --git a/clang/lib/AST/TemplateBase.cpp b/clang/lib/AST/TemplateBase.cpp index 44d52c56ffbbc..b6af655deff04 100644 --- a/clang/lib/AST/TemplateBase.cpp +++ b/clang/lib/AST/TemplateBase.cpp @@ -131,17 +131,25 @@ TemplateArgumentDependence TemplateArgument::getDependence() const { return TemplateArgumentDependence::Dependent | TemplateArgumentDependence::Instantiation; + case Declaration: { + auto *DC = dyn_cast(getAsDecl()); + if (!DC) + DC = getAsDecl()->getDeclContext(); + if (DC->isDependentContext()) + Deps = TemplateArgumentDependence::Dependent | + TemplateArgumentDependence::Instantiation; + return Deps; + } + case NullPtr: case Integral: - case Declaration: return TemplateArgumentDependence::None; case Expression: Deps = toTemplateArgumentDependence(getAsExpr()->getDependence()); - // Instantiation-dependent expression arguments are considered dependent - // until they're resolved to another form. - if (Deps & TemplateArgumentDependence::Instantiation) - Deps |= TemplateArgumentDependence::Dependent; + if (isa(getAsExpr())) + Deps |= TemplateArgumentDependence::Dependent | + TemplateArgumentDependence::Instantiation; return Deps; case Pack: @@ -536,8 +544,8 @@ ASTTemplateArgumentListInfo::ASTTemplateArgumentListInfo( NumTemplateArgs = Info.size(); TemplateArgumentLoc *ArgBuffer = getTrailingObjects(); - std::uninitialized_copy(Info.arguments().begin(), Info.arguments().end(), - ArgBuffer); + for (unsigned i = 0; i != NumTemplateArgs; ++i) + new (&ArgBuffer[i]) TemplateArgumentLoc(Info[i]); } void ASTTemplateKWAndArgsInfo::initializeFrom( @@ -547,8 +555,9 @@ void ASTTemplateKWAndArgsInfo::initializeFrom( LAngleLoc = Info.getLAngleLoc(); RAngleLoc = Info.getRAngleLoc(); NumTemplateArgs = Info.size(); - std::uninitialized_copy(Info.arguments().begin(), Info.arguments().end(), - OutArgArray); + + for (unsigned i = 0; i != NumTemplateArgs; ++i) + new (&OutArgArray[i]) TemplateArgumentLoc(Info[i]); } void ASTTemplateKWAndArgsInfo::initializeFrom(SourceLocation TemplateKWLoc) { @@ -559,6 +568,21 @@ void ASTTemplateKWAndArgsInfo::initializeFrom(SourceLocation TemplateKWLoc) { NumTemplateArgs = 0; } +void ASTTemplateKWAndArgsInfo::initializeFrom( + SourceLocation TemplateKWLoc, const TemplateArgumentListInfo &Info, + TemplateArgumentLoc *OutArgArray, TemplateArgumentDependence &Deps) { + this->TemplateKWLoc = TemplateKWLoc; + LAngleLoc = Info.getLAngleLoc(); + RAngleLoc = Info.getRAngleLoc(); + NumTemplateArgs = Info.size(); + + for (unsigned i = 0; i != NumTemplateArgs; ++i) { + Deps |= Info[i].getArgument().getDependence(); + + new (&OutArgArray[i]) TemplateArgumentLoc(Info[i]); + } +} + void ASTTemplateKWAndArgsInfo::copyInto(const TemplateArgumentLoc *ArgArray, TemplateArgumentListInfo &Info) const { Info.setLAngleLoc(LAngleLoc); diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp index ac52612ea3b0c..13d2125d1a28d 100644 --- a/clang/lib/Sema/SemaOverload.cpp +++ b/clang/lib/Sema/SemaOverload.cpp @@ -5619,8 +5619,7 @@ static ExprResult CheckConvertedConstantExpression(Sema &S, Expr *From, QualType T, APValue &Value, Sema::CCEKind CCE, bool RequireInt, - NamedDecl *Dest, - bool *ValueDependent) { + NamedDecl *Dest) { assert(S.getLangOpts().CPlusPlus11 && "converted constant expression outside C++11"); @@ -5744,8 +5743,6 @@ static ExprResult CheckConvertedConstantExpression(Sema &S, Expr *From, if (Result.get()->isValueDependent()) { Value = APValue(); - if (ValueDependent) - *ValueDependent = true; return Result; } @@ -5769,8 +5766,6 @@ static ExprResult CheckConvertedConstantExpression(Sema &S, Expr *From, Result = ExprError(); } else { Value = Eval.Val; - if (ValueDependent) - *ValueDependent = Eval.Dependent; if (Notes.empty()) { // It's a constant expression. @@ -5801,10 +5796,9 @@ static ExprResult CheckConvertedConstantExpression(Sema &S, Expr *From, ExprResult Sema::CheckConvertedConstantExpression(Expr *From, QualType T, APValue &Value, CCEKind CCE, - NamedDecl *Dest, - bool *ValueDependent) { + NamedDecl *Dest) { return ::CheckConvertedConstantExpression(*this, From, T, Value, CCE, false, - Dest, ValueDependent); + Dest); } ExprResult Sema::CheckConvertedConstantExpression(Expr *From, QualType T, @@ -5814,8 +5808,7 @@ ExprResult Sema::CheckConvertedConstantExpression(Expr *From, QualType T, APValue V; auto R = ::CheckConvertedConstantExpression(*this, From, T, V, CCE, true, - /*Dest=*/nullptr, - /*ValueDependent=*/nullptr); + /*Dest=*/nullptr); if (!R.isInvalid() && !R.get()->isValueDependent()) Value = V.getInt(); return R; diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp index 7ebd9be831ad5..64259767d98a6 100644 --- a/clang/lib/Sema/SemaTemplate.cpp +++ b/clang/lib/Sema/SemaTemplate.cpp @@ -6620,12 +6620,6 @@ CheckTemplateArgumentAddressOfObjectOrFunction(Sema &S, Arg, ArgType)) return true; - // Don't build a resolved template argument naming a dependent declaration. - if (Entity->isTemplated()) { - Converted = TemplateArgument(ArgIn); - return false; - } - // Create the template argument. Converted = TemplateArgument(cast(Entity->getCanonicalDecl()), S.Context.getCanonicalType(ParamType)); @@ -6640,6 +6634,8 @@ static bool CheckTemplateArgumentPointerToMember(Sema &S, QualType ParamType, Expr *&ResultArg, TemplateArgument &Converted) { + bool Invalid = false; + Expr *Arg = ResultArg; bool ObjCLifetimeConversion; @@ -6655,7 +6651,7 @@ static bool CheckTemplateArgumentPointerToMember(Sema &S, // See http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#773 bool ExtraParens = false; while (ParenExpr *Parens = dyn_cast(Arg)) { - if (!ExtraParens) { + if (!Invalid && !ExtraParens) { S.Diag(Arg->getBeginLoc(), S.getLangOpts().CPlusPlus11 ? diag::warn_cxx98_compat_template_arg_extra_parens @@ -6684,8 +6680,13 @@ static bool CheckTemplateArgumentPointerToMember(Sema &S, ValueDecl *VD = DRE->getDecl(); if (VD->getType()->isMemberPointerType()) { if (isa(VD)) { - Converted = TemplateArgument(Arg); - return false; + if (Arg->isTypeDependent() || Arg->isValueDependent()) { + Converted = TemplateArgument(Arg); + } else { + VD = cast(VD->getCanonicalDecl()); + Converted = TemplateArgument(VD, ParamType); + } + return Invalid; } } @@ -6744,7 +6745,7 @@ static bool CheckTemplateArgumentPointerToMember(Sema &S, ValueDecl *D = cast(DRE->getDecl()->getCanonicalDecl()); Converted = TemplateArgument(D, S.Context.getCanonicalType(ParamType)); } - return false; + return Invalid; } // We found something else, but we don't know specifically what it is. @@ -6921,17 +6922,14 @@ ExprResult Sema::CheckTemplateArgument(NonTypeTemplateParmDecl *Param, // A template-argument for a non-type template parameter shall be // a converted constant expression of the type of the template-parameter. APValue Value; - bool ValueDependent = false; ExprResult ArgResult = CheckConvertedConstantExpression( - Arg, ParamType, Value, CCEK_TemplateArg, Param, &ValueDependent); + Arg, ParamType, Value, CCEK_TemplateArg, Param); if (ArgResult.isInvalid()) return ExprError(); // For a value-dependent argument, CheckConvertedConstantExpression is - // permitted (and expected) to be unable to determine a value. We might find - // the evaluated result refers to a dependent declaration even though the - // template argument is not a value-dependent expression. - if (ValueDependent) { + // permitted (and expected) to be unable to determine a value. + if (ArgResult.get()->isValueDependent()) { Converted = TemplateArgument(ArgResult.get()); return ArgResult; } diff --git a/clang/lib/Sema/SemaTemplateInstantiate.cpp b/clang/lib/Sema/SemaTemplateInstantiate.cpp index cbf4fb1de4656..39ea9e06e7b1f 100644 --- a/clang/lib/Sema/SemaTemplateInstantiate.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiate.cpp @@ -3227,8 +3227,7 @@ Sema::InstantiateClassMembers(SourceLocation PointOfInstantiation, if (FunctionDecl *Pattern = Function->getInstantiatedFromMemberFunction()) { - if (TSK != TSK_ImplicitInstantiation && - Function->hasAttr()) + if (Function->hasAttr()) continue; MemberSpecializationInfo *MSInfo = @@ -3273,8 +3272,7 @@ Sema::InstantiateClassMembers(SourceLocation PointOfInstantiation, continue; if (Var->isStaticDataMember()) { - if (TSK != TSK_ImplicitInstantiation && - Var->hasAttr()) + if (Var->hasAttr()) continue; MemberSpecializationInfo *MSInfo = Var->getMemberSpecializationInfo(); @@ -3291,7 +3289,7 @@ Sema::InstantiateClassMembers(SourceLocation PointOfInstantiation, SuppressNew) continue; - if (TSK != TSK_ExplicitInstantiationDeclaration) { + if (TSK == TSK_ExplicitInstantiationDefinition) { // C++0x [temp.explicit]p8: // An explicit instantiation definition that names a class template // specialization explicitly instantiates the class template diff --git a/clang/test/OpenMP/distribute_dist_schedule_messages.cpp b/clang/test/OpenMP/distribute_dist_schedule_messages.cpp index 0f7b2172f5a54..cd232f40feb43 100644 --- a/clang/test/OpenMP/distribute_dist_schedule_messages.cpp +++ b/clang/test/OpenMP/distribute_dist_schedule_messages.cpp @@ -35,7 +35,7 @@ T tmain(T argc) { for (int i = 0; i < 10; ++i) foo(); #pragma omp distribute dist_schedule (static, S1) // expected-error {{'S1' does not refer to a value}} for (int i = 0; i < 10; ++i) foo(); - #pragma omp distribute dist_schedule (static, argv[1]=2) // expected-error {{expected ')'}} expected-note {{to match this '('}} expected-error 2{{expression must have integral or unscoped enumeration type, not 'char *'}} + #pragma omp distribute dist_schedule (static, argv[1]=2) // expected-error {{expected ')'}} expected-note {{to match this '('}} expected-error3 {{expression must have integral or unscoped enumeration type, not 'char *'}} for (int i = 0; i < 10; ++i) foo(); return T(); } diff --git a/clang/test/OpenMP/distribute_parallel_for_dist_schedule_messages.cpp b/clang/test/OpenMP/distribute_parallel_for_dist_schedule_messages.cpp index 18dcac555f740..07e7704dffded 100644 --- a/clang/test/OpenMP/distribute_parallel_for_dist_schedule_messages.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_dist_schedule_messages.cpp @@ -54,7 +54,7 @@ T tmain(T argc) { for (int i = 0; i < 10; ++i) foo(); #pragma omp target #pragma omp teams -#pragma omp distribute parallel for dist_schedule (static, argv[1]=2) // expected-error {{expected ')'}} expected-note {{to match this '('}} expected-error 2{{expression must have integral or unscoped enumeration type, not 'char *'}} +#pragma omp distribute parallel for dist_schedule (static, argv[1]=2) // expected-error {{expected ')'}} expected-note {{to match this '('}} expected-error3 {{expression must have integral or unscoped enumeration type, not 'char *'}} for (int i = 0; i < 10; ++i) foo(); return T(); } diff --git a/clang/test/OpenMP/distribute_parallel_for_simd_dist_schedule_messages.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_dist_schedule_messages.cpp index 63f8cfe917cd4..ed7b19111dee8 100644 --- a/clang/test/OpenMP/distribute_parallel_for_simd_dist_schedule_messages.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_simd_dist_schedule_messages.cpp @@ -55,7 +55,7 @@ T tmain(T argc) { for (int i = 0; i < 10; ++i) foo(); #pragma omp target #pragma omp teams -#pragma omp distribute parallel for simd dist_schedule (static, argv[1]=2) // expected-error {{expected ')'}} expected-note {{to match this '('}} expected-error 2{{expression must have integral or unscoped enumeration type, not 'char *'}} +#pragma omp distribute parallel for simd dist_schedule (static, argv[1]=2) // expected-error {{expected ')'}} expected-note {{to match this '('}} expected-error3 {{expression must have integral or unscoped enumeration type, not 'char *'}} for (int i = 0; i < 10; ++i) foo(); return T(); } diff --git a/clang/test/OpenMP/distribute_simd_dist_schedule_messages.cpp b/clang/test/OpenMP/distribute_simd_dist_schedule_messages.cpp index a6593cfbe0d3d..794681c02646c 100644 --- a/clang/test/OpenMP/distribute_simd_dist_schedule_messages.cpp +++ b/clang/test/OpenMP/distribute_simd_dist_schedule_messages.cpp @@ -63,7 +63,7 @@ T tmain(T argc) { for (int i = 0; i < 10; ++i) foo(); #pragma omp target #pragma omp teams -#pragma omp distribute simd dist_schedule (static, argv[1]=2) // expected-error {{expected ')'}} expected-note {{to match this '('}} expected-error 2{{expression must have integral or unscoped enumeration type, not 'char *'}} +#pragma omp distribute simd dist_schedule (static, argv[1]=2) // expected-error {{expected ')'}} expected-note {{to match this '('}} expected-error3 {{expression must have integral or unscoped enumeration type, not 'char *'}} for (int i = 0; i < 10; ++i) foo(); return T(); } diff --git a/clang/test/OpenMP/target_parallel_for_simd_collapse_messages.cpp b/clang/test/OpenMP/target_parallel_for_simd_collapse_messages.cpp index f829874864daa..7acb2587f976f 100644 --- a/clang/test/OpenMP/target_parallel_for_simd_collapse_messages.cpp +++ b/clang/test/OpenMP/target_parallel_for_simd_collapse_messages.cpp @@ -46,7 +46,7 @@ T tmain(T argc, S **argv) { #pragma omp target parallel for simd collapse (S) // expected-error {{'S' does not refer to a value}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; - // expected-error@+1 1+{{integral constant expression}} expected-note@+1 0+{{constant expression}} + // expected-error@+1 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp target parallel for simd collapse (j=2) // expected-error {{expected ')'}} expected-note {{to match this '('}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp target parallel for simd collapse (1) diff --git a/clang/test/OpenMP/target_parallel_for_simd_ordered_messages.cpp b/clang/test/OpenMP/target_parallel_for_simd_ordered_messages.cpp index 972aa5753e363..8dd7f68c25fd8 100644 --- a/clang/test/OpenMP/target_parallel_for_simd_ordered_messages.cpp +++ b/clang/test/OpenMP/target_parallel_for_simd_ordered_messages.cpp @@ -56,7 +56,7 @@ T tmain(T argc, S **argv) { for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i - ST]; -// expected-error@+1 {{'ordered' clause with a parameter can not be specified in '#pragma omp target parallel for simd' directive}} +// expected-error@+1 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp target parallel for simd ordered(j = 2) // expected-error {{expected ')'}} expected-note {{to match this '('}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i - ST]; diff --git a/clang/test/OpenMP/target_simd_collapse_messages.cpp b/clang/test/OpenMP/target_simd_collapse_messages.cpp index d8b0a91f97d7b..00fa3c85279f2 100644 --- a/clang/test/OpenMP/target_simd_collapse_messages.cpp +++ b/clang/test/OpenMP/target_simd_collapse_messages.cpp @@ -44,7 +44,7 @@ T tmain(T argc, S **argv) { #pragma omp target simd collapse (S) // expected-error {{'S' does not refer to a value}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; - // expected-error@+1 1+{{integral constant expression}} expected-note@+1 0+{{constant expression}} + // expected-error@+1 {{integral constant expression}} expected-note@+1 0+{{constant expression}} #pragma omp target simd collapse (j=2) // expected-error {{expected ')'}} expected-note {{to match this '('}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; #pragma omp target simd collapse (1) diff --git a/clang/test/OpenMP/target_teams_distribute_dist_schedule_messages.cpp b/clang/test/OpenMP/target_teams_distribute_dist_schedule_messages.cpp index e31df97ba31c9..69c1e55eeaa3b 100644 --- a/clang/test/OpenMP/target_teams_distribute_dist_schedule_messages.cpp +++ b/clang/test/OpenMP/target_teams_distribute_dist_schedule_messages.cpp @@ -45,7 +45,7 @@ T tmain(T argc) { #pragma omp target teams distribute dist_schedule (static, S1) // expected-error {{'S1' does not refer to a value}} for (int i = 0; i < 10; ++i) foo(); -#pragma omp target teams distribute dist_schedule (static, argv[1]=2) // expected-error {{expected ')'}} expected-note {{to match this '('}} expected-error 2{{expression must have integral or unscoped enumeration type, not 'char *'}} +#pragma omp target teams distribute dist_schedule (static, argv[1]=2) // expected-error {{expected ')'}} expected-note {{to match this '('}} expected-error3 {{expression must have integral or unscoped enumeration type, not 'char *'}} for (int i = 0; i < 10; ++i) foo(); return T(); diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_dist_schedule_messages.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_dist_schedule_messages.cpp index 4f3c581254469..a0efad18668e9 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_dist_schedule_messages.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_dist_schedule_messages.cpp @@ -45,7 +45,7 @@ T tmain(T argc) { #pragma omp target teams distribute parallel for dist_schedule (static, S1) // expected-error {{'S1' does not refer to a value}} for (int i = 0; i < 10; ++i) foo(); -#pragma omp target teams distribute parallel for dist_schedule (static, argv[1]=2) // expected-error {{expected ')'}} expected-note {{to match this '('}} expected-error 2{{expression must have integral or unscoped enumeration type, not 'char *'}} +#pragma omp target teams distribute parallel for dist_schedule (static, argv[1]=2) // expected-error {{expected ')'}} expected-note {{to match this '('}} expected-error3 {{expression must have integral or unscoped enumeration type, not 'char *'}} for (int i = 0; i < 10; ++i) foo(); return T(); diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_dist_schedule_messages.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_dist_schedule_messages.cpp index 8b272d4358f60..ec634c8ac01c9 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_dist_schedule_messages.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_dist_schedule_messages.cpp @@ -45,7 +45,7 @@ T tmain(T argc) { #pragma omp target teams distribute parallel for simd dist_schedule (static, S1) // expected-error {{'S1' does not refer to a value}} for (int i = 0; i < 10; ++i) foo(); -#pragma omp target teams distribute parallel for simd dist_schedule (static, argv[1]=2) // expected-error {{expected ')'}} expected-note {{to match this '('}} expected-error 2{{expression must have integral or unscoped enumeration type, not 'char *'}} +#pragma omp target teams distribute parallel for simd dist_schedule (static, argv[1]=2) // expected-error {{expected ')'}} expected-note {{to match this '('}} expected-error3 {{expression must have integral or unscoped enumeration type, not 'char *'}} for (int i = 0; i < 10; ++i) foo(); return T(); diff --git a/clang/test/OpenMP/target_teams_distribute_simd_dist_schedule_messages.cpp b/clang/test/OpenMP/target_teams_distribute_simd_dist_schedule_messages.cpp index b583c14831e70..507ddabd2fc20 100644 --- a/clang/test/OpenMP/target_teams_distribute_simd_dist_schedule_messages.cpp +++ b/clang/test/OpenMP/target_teams_distribute_simd_dist_schedule_messages.cpp @@ -45,7 +45,7 @@ T tmain(T argc) { #pragma omp target teams distribute simd dist_schedule (static, S1) // expected-error {{'S1' does not refer to a value}} for (int i = 0; i < 10; ++i) foo(); -#pragma omp target teams distribute simd dist_schedule (static, argv[1]=2) // expected-error {{expected ')'}} expected-note {{to match this '('}} expected-error 2{{expression must have integral or unscoped enumeration type, not 'char *'}} +#pragma omp target teams distribute simd dist_schedule (static, argv[1]=2) // expected-error {{expected ')'}} expected-note {{to match this '('}} expected-error3 {{expression must have integral or unscoped enumeration type, not 'char *'}} for (int i = 0; i < 10; ++i) foo(); return T(); diff --git a/clang/test/OpenMP/target_update_from_messages.cpp b/clang/test/OpenMP/target_update_from_messages.cpp index 42ecc2814e125..3dc377c4ca4ce 100644 --- a/clang/test/OpenMP/target_update_from_messages.cpp +++ b/clang/test/OpenMP/target_update_from_messages.cpp @@ -131,7 +131,7 @@ T tmain(T argc) { #pragma omp target update from(x, s7.s6[:5].aa[6]) // expected-error {{OpenMP array section is not allowed here}} #pragma omp target update from(x, s7.s6[:5].aa[:6]) // expected-error {{OpenMP array section is not allowed here}} #pragma omp target update from(s7.p[:10]) -#pragma omp target update from(x, s7.bfa) // expected-error 2{{bit fields cannot be used to specify storage in a 'from' clause}} +#pragma omp target update from(x, s7.bfa) // expected-error {{bit fields cannot be used to specify storage in a 'from' clause}} #pragma omp target update from(x, s7.p[:]) // expected-error {{section length is unspecified and cannot be inferred because subscripted value is not an array}} #pragma omp target data map(to: s7.i) { diff --git a/clang/test/OpenMP/target_update_to_messages.cpp b/clang/test/OpenMP/target_update_to_messages.cpp index 941c781119e6e..fca4e21304fce 100644 --- a/clang/test/OpenMP/target_update_to_messages.cpp +++ b/clang/test/OpenMP/target_update_to_messages.cpp @@ -138,7 +138,7 @@ T tmain(T argc) { #pragma omp target update to(x, s7.s6[:5].aa[6]) // expected-error {{OpenMP array section is not allowed here}} #pragma omp target update to(x, s7.s6[:5].aa[:6]) // expected-error {{OpenMP array section is not allowed here}} #pragma omp target update to(s7.p[:10]) -#pragma omp target update to(x, s7.bfa) // expected-error 2{{bit fields cannot be used to specify storage in a 'to' clause}} +#pragma omp target update to(x, s7.bfa) // expected-error {{bit fields cannot be used to specify storage in a 'to' clause}} #pragma omp target update to(x, s7.p[:]) // expected-error {{section length is unspecified and cannot be inferred because subscripted value is not an array}} #pragma omp target data map(to: s7.i) { diff --git a/clang/test/OpenMP/task_messages.cpp b/clang/test/OpenMP/task_messages.cpp index 2f9ee9a44402e..13cbfb6c45693 100644 --- a/clang/test/OpenMP/task_messages.cpp +++ b/clang/test/OpenMP/task_messages.cpp @@ -156,11 +156,11 @@ int foo() { #pragma omp task detach(a) // omp45-error {{unexpected OpenMP clause 'detach' in directive '#pragma omp task'}} omp50-error {{expected variable of the 'omp_event_handle_t' type, not 'int'}} omp50-error {{expected variable of the 'omp_event_handle_t' type, not 'S'}} ; #pragma omp task detach(evt) detach(evt) // omp45-error 2 {{unexpected OpenMP clause 'detach' in directive '#pragma omp task'}} expected-error {{directive '#pragma omp task' cannot contain more than one 'detach' clause}} -#pragma omp task detach(cevt) detach(revt) // omp45-error 2 {{unexpected OpenMP clause 'detach' in directive '#pragma omp task'}} expected-error {{directive '#pragma omp task' cannot contain more than one 'detach' clause}} +#pragma omp task detach(cevt) detach(revt) // omp45-error 2 {{unexpected OpenMP clause 'detach' in directive '#pragma omp task'}} expected-error {{directive '#pragma omp task' cannot contain more than one 'detach' clause}} omp50-error {{expected variable of the 'omp_event_handle_t' type, not 'const omp_event_handle_t' (aka 'const unsigned long')}} omp50-error {{expected variable of the 'omp_event_handle_t' type, not 'omp_event_handle_t &' (aka 'unsigned long &')}} #pragma omp task detach(evt) mergeable // omp45-error {{unexpected OpenMP clause 'detach' in directive '#pragma omp task'}} omp50-error {{'mergeable' and 'detach' clause are mutually exclusive and may not appear on the same directive}} omp50-note {{'detach' clause is specified here}} ; #pragma omp task mergeable detach(evt) // omp45-error {{unexpected OpenMP clause 'detach' in directive '#pragma omp task'}} omp50-error {{'detach' and 'mergeable' clause are mutually exclusive and may not appear on the same directive}} omp50-note {{'mergeable' clause is specified here}} -#pragma omp task detach(-evt) // omp45-error {{unexpected OpenMP clause 'detach' in directive '#pragma omp task'}} +#pragma omp task detach(-evt) // omp45-error {{unexpected OpenMP clause 'detach' in directive '#pragma omp task'}} omp50-error {{expected variable of the 'omp_event_handle_t' type}} ; #pragma omp task detach(evt) shared(evt) // omp45-error {{unexpected OpenMP clause 'detach' in directive '#pragma omp task'}} #pragma omp task detach(evt) firstprivate(evt) // omp45-error {{unexpected OpenMP clause 'detach' in directive '#pragma omp task'}} diff --git a/clang/test/OpenMP/teams_distribute_dist_schedule_messages.cpp b/clang/test/OpenMP/teams_distribute_dist_schedule_messages.cpp index bd1aaa5c62896..22d2408d3f178 100644 --- a/clang/test/OpenMP/teams_distribute_dist_schedule_messages.cpp +++ b/clang/test/OpenMP/teams_distribute_dist_schedule_messages.cpp @@ -55,7 +55,7 @@ T tmain(T argc) { for (int i = 0; i < 10; ++i) foo(); #pragma omp target -#pragma omp teams distribute dist_schedule (static, argv[1]=2) // expected-error {{expected ')'}} expected-note {{to match this '('}} expected-error 2{{expression must have integral or unscoped enumeration type, not 'char *'}} +#pragma omp teams distribute dist_schedule (static, argv[1]=2) // expected-error {{expected ')'}} expected-note {{to match this '('}} expected-error3 {{expression must have integral or unscoped enumeration type, not 'char *'}} for (int i = 0; i < 10; ++i) foo(); return T(); diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_dist_schedule_messages.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_dist_schedule_messages.cpp index a70d80ad1251e..27ff4125daf7f 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_dist_schedule_messages.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_dist_schedule_messages.cpp @@ -55,7 +55,7 @@ T tmain(T argc) { for (int i = 0; i < 10; ++i) foo(); #pragma omp target -#pragma omp teams distribute parallel for dist_schedule (static, argv[1]=2) // expected-error {{expected ')'}} expected-note {{to match this '('}} expected-error 2{{expression must have integral or unscoped enumeration type, not 'char *'}} +#pragma omp teams distribute parallel for dist_schedule (static, argv[1]=2) // expected-error {{expected ')'}} expected-note {{to match this '('}} expected-error3 {{expression must have integral or unscoped enumeration type, not 'char *'}} for (int i = 0; i < 10; ++i) foo(); return T(); diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_dist_schedule_messages.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_dist_schedule_messages.cpp index b87301fa98d8a..cbd4ec4ce9793 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_dist_schedule_messages.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_dist_schedule_messages.cpp @@ -55,7 +55,7 @@ T tmain(T argc) { for (int i = 0; i < 10; ++i) foo(); #pragma omp target -#pragma omp teams distribute parallel for simd dist_schedule (static, argv[1]=2) // expected-error {{expected ')'}} expected-note {{to match this '('}} expected-error 2{{expression must have integral or unscoped enumeration type, not 'char *'}} +#pragma omp teams distribute parallel for simd dist_schedule (static, argv[1]=2) // expected-error {{expected ')'}} expected-note {{to match this '('}} expected-error3 {{expression must have integral or unscoped enumeration type, not 'char *'}} for (int i = 0; i < 10; ++i) foo(); return T(); diff --git a/clang/test/OpenMP/teams_distribute_simd_dist_schedule_messages.cpp b/clang/test/OpenMP/teams_distribute_simd_dist_schedule_messages.cpp index 6e653fae08c81..424797576837f 100644 --- a/clang/test/OpenMP/teams_distribute_simd_dist_schedule_messages.cpp +++ b/clang/test/OpenMP/teams_distribute_simd_dist_schedule_messages.cpp @@ -55,7 +55,7 @@ T tmain(T argc) { for (int i = 0; i < 10; ++i) foo(); #pragma omp target -#pragma omp teams distribute simd dist_schedule (static, argv[1]=2) // expected-error {{expected ')'}} expected-note {{to match this '('}} expected-error 2{{expression must have integral or unscoped enumeration type, not 'char *'}} +#pragma omp teams distribute simd dist_schedule (static, argv[1]=2) // expected-error {{expected ')'}} expected-note {{to match this '('}} expected-error3 {{expression must have integral or unscoped enumeration type, not 'char *'}} for (int i = 0; i < 10; ++i) foo(); return T(); diff --git a/clang/test/SemaCXX/warn-unused-lambda-capture.cpp b/clang/test/SemaCXX/warn-unused-lambda-capture.cpp index 764a4a42a0846..52ec390b0bba6 100644 --- a/clang/test/SemaCXX/warn-unused-lambda-capture.cpp +++ b/clang/test/SemaCXX/warn-unused-lambda-capture.cpp @@ -147,7 +147,7 @@ void test_templated() { auto explicit_by_value_unused = [i] {}; // expected-warning{{lambda capture 'i' is not used}} auto explicit_by_value_unused_sizeof = [i] { return sizeof(i); }; // expected-warning{{lambda capture 'i' is not required to be captured for this use}} - auto explicit_by_value_unused_decltype = [i] { decltype(i) j = 0; }; // expected-warning{{lambda capture 'i' is not required to be captured for this use}} + auto explicit_by_value_unused_decltype = [i] { decltype(i) j = 0; }; // expected-warning{{lambda capture 'i' is not used}} auto explicit_by_value_unused_const = [k] { return k + 1; }; // expected-warning{{lambda capture 'k' is not required to be captured for this use}} auto explicit_by_value_unused_const_generic = [k](auto c) { return k + 1; }; // expected-warning{{lambda capture 'k' is not required to be captured for this use}} diff --git a/clang/test/SemaTemplate/temp_arg_nontype_cxx17.cpp b/clang/test/SemaTemplate/temp_arg_nontype_cxx1z.cpp similarity index 98% rename from clang/test/SemaTemplate/temp_arg_nontype_cxx17.cpp rename to clang/test/SemaTemplate/temp_arg_nontype_cxx1z.cpp index 52cf51719f05a..675f957ef6fad 100644 --- a/clang/test/SemaTemplate/temp_arg_nontype_cxx17.cpp +++ b/clang/test/SemaTemplate/temp_arg_nontype_cxx1z.cpp @@ -503,13 +503,3 @@ namespace PR48517 { template<> struct Q<&R::n> { static constexpr int X = 1; }; static_assert(R().f() == 1); } - -namespace dependent_reference { - template struct S { int *q = &r; }; - template auto f() { static int n; return S(); } - auto v = f<0>(); - auto w = f<1>(); - static_assert(!is_same); - // Ensure that we can instantiate the definition of S<...>. - int n = *v.q + *w.q; -} diff --git a/clang/test/SemaTemplate/temp_arg_nontype_cxx20.cpp b/clang/test/SemaTemplate/temp_arg_nontype_cxx20.cpp index d514465f7d677..c42fda7804305 100644 --- a/clang/test/SemaTemplate/temp_arg_nontype_cxx20.cpp +++ b/clang/test/SemaTemplate/temp_arg_nontype_cxx20.cpp @@ -292,47 +292,3 @@ namespace Predefined { Y(); // expected-error {{reference to subobject of predefined '__func__' variable}} } } - -namespace dependent { - template struct R { static inline auto &v = V; }; - template constexpr bool operator==(R, R) { return &V == &W; } - template struct S { static inline auto *v = V; }; - template constexpr bool operator==(S, S) { return V == W; } - template struct T { static inline const auto &v = V; }; - template constexpr bool operator==(T, T) { return &V == &W; } - template struct V { T v; }; - template auto f() { - static int n; - static V vn; - if constexpr (N < 10) - return R(); - else if constexpr (N < 20) - return R(); // FIXME: expected-error 2{{refers to subobject}} - else if constexpr (N < 30) - return S<&n>(); - else if constexpr (N < 40) - return S<&vn.v>(); // FIXME: expected-error 2{{refers to subobject}} - else if constexpr (N < 50) - return T{n}>(); - else if constexpr (N < 60) - return T{&n}>(); - else if constexpr (N < 70) - return T{vn.v}>(); - else if constexpr (N < 80) - return T{&vn.v}>(); - } - template void check() { - auto v = f(); // FIXME: expected-note 2{{instantiation of}} - auto w = f(); // FIXME: expected-note 2{{instantiation of}} - static_assert(!__is_same(decltype(v), decltype(w))); - static_assert(v != w); - } - template void check<0>(); - template void check<10>(); // FIXME: expected-note 2{{instantiation of}} - template void check<20>(); - template void check<30>(); // FIXME: expected-note 2{{instantiation of}} - template void check<40>(); - template void check<50>(); - template void check<60>(); - template void check<70>(); -} From 34e72a146111dd986889a0f0ec8767b2ca6b2913 Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Tue, 22 Dec 2020 10:12:44 -0800 Subject: [PATCH 115/378] Revert "DR2064: decltype(E) is only a dependent type if E is type-dependent, not" This reverts commit 638867afd4bce4a2c56dea041299428af3727d61. This is part of 5 commits being reverted due to https://crbug.com/1161059. See bug for repro. --- clang/include/clang/AST/DependenceFlags.h | 6 ------ clang/lib/AST/ASTContext.cpp | 8 ++++---- clang/lib/AST/ItaniumMangle.cpp | 5 ----- clang/lib/AST/Type.cpp | 20 +++++++++++++------ clang/test/CXX/drs/dr20xx.cpp | 12 ----------- clang/test/Sema/invalid-bitwidth-expr.mm | 1 - .../invalid-template-base-specifier.cpp | 4 ++-- clang/test/SemaTemplate/dependent-expr.cpp | 7 +------ .../SemaTemplate/temp_arg_template_cxx1z.cpp | 8 +------- clang/www/cxx_dr_status.html | 2 +- 10 files changed, 23 insertions(+), 50 deletions(-) diff --git a/clang/include/clang/AST/DependenceFlags.h b/clang/include/clang/AST/DependenceFlags.h index 8c47047a7526d..ca96b65574bdd 100644 --- a/clang/include/clang/AST/DependenceFlags.h +++ b/clang/include/clang/AST/DependenceFlags.h @@ -255,12 +255,6 @@ inline TypeDependence toTypeDependence(TemplateNameDependence D) { inline TypeDependence toTypeDependence(TemplateArgumentDependence D) { return Dependence(D).type(); } -/// Compute the dependence of a type that depends on the type of an expression, -/// given the dependence of that expression and of its type. -inline TypeDependence typeToTypeDependence(ExprDependence ED, TypeDependence TD) { - return Dependence(ED & ~ExprDependence::Value).type() | - (TD & TypeDependence::VariablyModified); -} inline NestedNameSpecifierDependence toNestedNameSpecifierDependendence(TypeDependence D) { diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index 0190573fe36e2..44545f00b146a 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -5383,10 +5383,10 @@ QualType ASTContext::getDecltypeType(Expr *e, QualType UnderlyingType) const { DecltypeType *dt; // C++11 [temp.type]p2: - // If an expression e is type-dependent, decltype(e) denotes a unique - // dependent type. Two such decltype-specifiers refer to the same type only - // if their expressions are equivalent (14.5.6.1). - if (e->isTypeDependent()) { + // If an expression e involves a template parameter, decltype(e) denotes a + // unique dependent type. Two such decltype-specifiers refer to the same + // type only if their expressions are equivalent (14.5.6.1). + if (e->isInstantiationDependent()) { llvm::FoldingSetNodeID ID; DependentDecltypeType::Profile(ID, *this, e); diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp index 01deb598a0781..6c8d5687c64a8 100644 --- a/clang/lib/AST/ItaniumMangle.cpp +++ b/clang/lib/AST/ItaniumMangle.cpp @@ -2582,11 +2582,6 @@ void CXXNameMangler::mangleType(QualType T) { // instantation-dependent qualifiers. See // https://github.com/itanium-cxx-abi/cxx-abi/issues/114. - // Don't desugar instantiation-dependent decltype / typeof types. We need - // to mangle the expression as written. - if (isa(T)) - break; - QualType Desugared = T.getSingleStepDesugaredType(Context.getASTContext()); if (Desugared == T) diff --git a/clang/lib/AST/Type.cpp b/clang/lib/AST/Type.cpp index 5dec80be9ccbf..034e175f13524 100644 --- a/clang/lib/AST/Type.cpp +++ b/clang/lib/AST/Type.cpp @@ -125,7 +125,8 @@ ArrayType::ArrayType(TypeClass tc, QualType et, QualType can, // template int arr[] = {N...}; : Type(tc, can, et->getDependence() | - (sz ? toTypeDependence(sz->getDependence()) + (sz ? toTypeDependence( + turnValueToTypeDependence(sz->getDependence())) : TypeDependence::None) | (tc == VariableArray ? TypeDependence::VariablyModified : TypeDependence::None) | @@ -3395,8 +3396,9 @@ QualType MacroQualifiedType::getModifiedType() const { TypeOfExprType::TypeOfExprType(Expr *E, QualType can) : Type(TypeOfExpr, can, - typeToTypeDependence(E->getDependence(), - E->getType()->getDependence())), + toTypeDependence(E->getDependence()) | + (E->getType()->getDependence() & + TypeDependence::VariablyModified)), TOExpr(E) {} bool TypeOfExprType::isSugared() const { @@ -3416,12 +3418,18 @@ void DependentTypeOfExprType::Profile(llvm::FoldingSetNodeID &ID, } DecltypeType::DecltypeType(Expr *E, QualType underlyingType, QualType can) + // C++11 [temp.type]p2: "If an expression e involves a template parameter, + // decltype(e) denotes a unique dependent type." Hence a decltype type is + // type-dependent even if its expression is only instantiation-dependent. : Type(Decltype, can, - typeToTypeDependence(E->getDependence(), - E->getType()->getDependence())), + toTypeDependence(E->getDependence()) | + (E->isInstantiationDependent() ? TypeDependence::Dependent + : TypeDependence::None) | + (E->getType()->getDependence() & + TypeDependence::VariablyModified)), E(E), UnderlyingType(underlyingType) {} -bool DecltypeType::isSugared() const { return !E->isTypeDependent(); } +bool DecltypeType::isSugared() const { return !E->isInstantiationDependent(); } QualType DecltypeType::desugar() const { if (isSugared()) diff --git a/clang/test/CXX/drs/dr20xx.cpp b/clang/test/CXX/drs/dr20xx.cpp index 6e1c0505a5ecc..56cc1161a00c8 100644 --- a/clang/test/CXX/drs/dr20xx.cpp +++ b/clang/test/CXX/drs/dr20xx.cpp @@ -49,18 +49,6 @@ namespace dr2026 { // dr2026: 11 } } -namespace dr2064 { // dr2064: 12 -#if __cplusplus >= 201103L - template struct X { - template struct Y {}; - }; - template void f() { - X::Y y; // ok - return X::f(); // expected-error {{no member named 'f' in 'dr2064::X= 201103L diff --git a/clang/test/Sema/invalid-bitwidth-expr.mm b/clang/test/Sema/invalid-bitwidth-expr.mm index 8ce498feb4aff..41ca9496de4f9 100644 --- a/clang/test/Sema/invalid-bitwidth-expr.mm +++ b/clang/test/Sema/invalid-bitwidth-expr.mm @@ -26,7 +26,6 @@ auto func() { auto func() { // error-bit should be propagated from TemplateArgument to NestNameSpecifier. class Base::type C; // expected-error {{no matching function for call to 'Foo'}} - // expected-error@-1 {{no class named 'type' in 'Base'}} return C; } struct Z { diff --git a/clang/test/SemaCXX/invalid-template-base-specifier.cpp b/clang/test/SemaCXX/invalid-template-base-specifier.cpp index 77601402a85ca..7a1a7f801c45e 100644 --- a/clang/test/SemaCXX/invalid-template-base-specifier.cpp +++ b/clang/test/SemaCXX/invalid-template-base-specifier.cpp @@ -12,11 +12,11 @@ void test() { Crash(); } // expected-note {{in instantiation of template cl template using Alias = decltype(Foo(T())); // expected-error {{no matching function for call to 'Foo'}} template -struct Crash2 : decltype(Alias()) { // expected-note {{in instantiation of template type alias 'Alias' requested here}} expected-error {{base specifier must name a class}} +struct Crash2 : decltype(Alias()) { // expected-note {{in instantiation of template type alias 'Alias' requested here}} Crash2(){}; }; -void test2() { Crash2(); } // expected-note 2{{in instantiation of template class 'Crash2' requested here}} +void test2() { Crash2(); } // expected-note {{in instantiation of template class 'Crash2' requested here}} template class Base {}; diff --git a/clang/test/SemaTemplate/dependent-expr.cpp b/clang/test/SemaTemplate/dependent-expr.cpp index dace7e28788d2..abdb8e9c4a9fd 100644 --- a/clang/test/SemaTemplate/dependent-expr.cpp +++ b/clang/test/SemaTemplate/dependent-expr.cpp @@ -129,12 +129,7 @@ namespace PR45083 { template void f() { decltype(({})) x; // expected-error {{incomplete type}} } - template void f(); - - template void f2() { - decltype(({T();})) x; // expected-error {{incomplete type}} - } - template void f2(); // expected-note {{instantiation of}} + template void f(); // expected-note {{instantiation of}} template auto g() { auto c = [](auto, int) -> decltype(({})) {}; diff --git a/clang/test/SemaTemplate/temp_arg_template_cxx1z.cpp b/clang/test/SemaTemplate/temp_arg_template_cxx1z.cpp index b9a1c933560d2..03ef78f8cf14e 100644 --- a/clang/test/SemaTemplate/temp_arg_template_cxx1z.cpp +++ b/clang/test/SemaTemplate/temp_arg_template_cxx1z.cpp @@ -115,12 +115,6 @@ namespace Auto { int n; template struct SubstFailure; - TInt isf; // expected-error {{template template argument has different template parameters than its corresponding template template parameter}} + TInt isf; // FIXME: this should be ill-formed TIntPtr ipsf; - - template typename C> struct TAutoAutoFirst {}; - template struct AutoAutoFirst; - template struct AutoAutoSecond; - TAutoAutoFirst aaf; - TAutoAutoFirst aas; // FIXME: this should be rejected due to parameter mismatch } diff --git a/clang/www/cxx_dr_status.html b/clang/www/cxx_dr_status.html index 9be6f1262b681..57093c1cf5b0c 100755 --- a/clang/www/cxx_dr_status.html +++ b/clang/www/cxx_dr_status.html @@ -12198,7 +12198,7 @@

C++ defect report implementation status

2064 CD4 Conflicting specifications for dependent decltype-specifiers - Clang 12 + Unknown 2065 From 1aa10ab2e1ddc863a944fb181b2b8ed633864bab Mon Sep 17 00:00:00 2001 From: sameeran joshi Date: Tue, 22 Dec 2020 23:46:51 +0530 Subject: [PATCH 116/378] Revert "[Flang][openmp][5.0] Add task_reduction clause." This reverts commit 9a7895dc20852b662a66976d06871ec2a0b968c8. Reverting due to missing Co-author attribution. https://reviews.llvm.org/D93105 --- flang/include/flang/Parser/parse-tree.h | 2 +- flang/lib/Parser/openmp-parsers.cpp | 5 +---- flang/lib/Parser/unparse.cpp | 2 +- flang/lib/Semantics/check-omp-structure.cpp | 1 - flang/lib/Semantics/check-omp-structure.h | 1 - flang/test/Semantics/omp-clause-validity01.f90 | 6 ++---- llvm/include/llvm/Frontend/OpenMP/OMP.td | 1 - 7 files changed, 5 insertions(+), 13 deletions(-) diff --git a/flang/include/flang/Parser/parse-tree.h b/flang/include/flang/Parser/parse-tree.h index 119a92bee2116..7e258b668576e 100644 --- a/flang/include/flang/Parser/parse-tree.h +++ b/flang/include/flang/Parser/parse-tree.h @@ -3415,7 +3415,7 @@ struct OmpReductionOperator { // variable-name-list) struct OmpReductionClause { TUPLE_CLASS_BOILERPLATE(OmpReductionClause); - std::tuple t; + std::tuple> t; }; // OMP 5.0 2.11.4 allocate-clause -> ALLOCATE ([allocator:] variable-name-list) diff --git a/flang/lib/Parser/openmp-parsers.cpp b/flang/lib/Parser/openmp-parsers.cpp index 3a0d28cd9c12f..1386b2b16a788 100644 --- a/flang/lib/Parser/openmp-parsers.cpp +++ b/flang/lib/Parser/openmp-parsers.cpp @@ -102,7 +102,7 @@ TYPE_PARSER(construct(Parser{}) || construct(Parser{})) TYPE_PARSER(construct( - Parser{} / ":", Parser{})) + Parser{} / ":", nonemptyList(designator))) // OMP 5.0 2.11.4 ALLOCATE ([allocator:] variable-name-list) TYPE_PARSER(construct( @@ -220,9 +220,6 @@ TYPE_PARSER( parenthesized(Parser{}))) || "REDUCTION" >> construct(parenthesized(Parser{})) || - "TASK_REDUCTION" >> - construct(construct( - parenthesized(Parser{}))) || "RELAXED" >> construct(construct()) || "RELEASE" >> construct(construct()) || "SAFELEN" >> construct(construct( diff --git a/flang/lib/Parser/unparse.cpp b/flang/lib/Parser/unparse.cpp index ba54a0a84fa73..fdb694f3d26f5 100644 --- a/flang/lib/Parser/unparse.cpp +++ b/flang/lib/Parser/unparse.cpp @@ -2016,7 +2016,7 @@ class UnparseVisitor { Word("REDUCTION("); Walk(std::get(x.t)); Put(":"); - Walk(std::get(x.t)); + Walk(std::get>(x.t), ","); Put(")"); } void Unparse(const OmpAllocateClause &x) { diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp index a144c7a2b57b9..e2c8333ce7ee4 100644 --- a/flang/lib/Semantics/check-omp-structure.cpp +++ b/flang/lib/Semantics/check-omp-structure.cpp @@ -419,7 +419,6 @@ CHECK_SIMPLE_CLAUSE(Mergeable, OMPC_mergeable) CHECK_SIMPLE_CLAUSE(Nogroup, OMPC_nogroup) CHECK_SIMPLE_CLAUSE(Notinbranch, OMPC_notinbranch) CHECK_SIMPLE_CLAUSE(Nowait, OMPC_nowait) -CHECK_SIMPLE_CLAUSE(TaskReduction, OMPC_task_reduction) CHECK_SIMPLE_CLAUSE(To, OMPC_to) CHECK_SIMPLE_CLAUSE(Uniform, OMPC_uniform) CHECK_SIMPLE_CLAUSE(Untied, OMPC_untied) diff --git a/flang/lib/Semantics/check-omp-structure.h b/flang/lib/Semantics/check-omp-structure.h index ccd0e08a8c08a..a966eaf8c4a7d 100644 --- a/flang/lib/Semantics/check-omp-structure.h +++ b/flang/lib/Semantics/check-omp-structure.h @@ -155,7 +155,6 @@ class OmpStructureChecker void Enter(const parser::OmpClause::Safelen &); void Enter(const parser::OmpClause::Shared &); void Enter(const parser::OmpClause::Simdlen &); - void Enter(const parser::OmpClause::TaskReduction &); void Enter(const parser::OmpClause::ThreadLimit &); void Enter(const parser::OmpClause::To &); void Enter(const parser::OmpClause::Link &); diff --git a/flang/test/Semantics/omp-clause-validity01.f90 b/flang/test/Semantics/omp-clause-validity01.f90 index 1d689ea916996..3f53451378663 100644 --- a/flang/test/Semantics/omp-clause-validity01.f90 +++ b/flang/test/Semantics/omp-clause-validity01.f90 @@ -349,8 +349,7 @@ ! collapse-clause a = 0.0 - !ERROR: TASK_REDUCTION clause is not allowed on the SIMD directive - !$omp simd private(b) reduction(+:a) task_reduction(+:a) + !$omp simd private(b) reduction(+:a) do i = 1, N a = a + b + 3.14 enddo @@ -450,8 +449,7 @@ enddo !ERROR: At most one NUM_TASKS clause can appear on the TASKLOOP directive - !ERROR: TASK_REDUCTION clause is not allowed on the TASKLOOP directive - !$omp taskloop num_tasks(3) num_tasks(2) task_reduction(*:a) + !$omp taskloop num_tasks(3) num_tasks(2) do i = 1,N a = 3.14 enddo diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td index 9fd14cb03a475..fa67a64fa9970 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMP.td +++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td @@ -231,7 +231,6 @@ def OMPC_IsDevicePtr : Clause<"is_device_ptr"> { } def OMPC_TaskReduction : Clause<"task_reduction"> { let clangClass = "OMPTaskReductionClause"; - let flangClassValue = "OmpReductionClause"; } def OMPC_InReduction : Clause<"in_reduction"> { let clangClass = "OMPInReductionClause"; From b2e734d5f46d70c5a73dd16b0619c58eff6b8052 Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Tue, 22 Dec 2020 10:31:21 -0800 Subject: [PATCH 117/378] Revert "[clangd] zap a few warnings" This reverts commit 95c7b6cadbc9a3d4376ef44edbeb3c8bb5b8d7fc. Depends on a reverted change. --- clang-tools-extra/clangd/DumpAST.cpp | 1 - clang-tools-extra/clangd/FindTarget.cpp | 1 - clang-tools-extra/clangd/index/remote/Client.cpp | 3 +-- 3 files changed, 1 insertion(+), 4 deletions(-) diff --git a/clang-tools-extra/clangd/DumpAST.cpp b/clang-tools-extra/clangd/DumpAST.cpp index 8f1b3f3a1aae5..12698b42ef3e6 100644 --- a/clang-tools-extra/clangd/DumpAST.cpp +++ b/clang-tools-extra/clangd/DumpAST.cpp @@ -143,7 +143,6 @@ class DumpVisitor : public RecursiveASTVisitor { TEMPLATE_ARGUMENT_KIND(Declaration); TEMPLATE_ARGUMENT_KIND(Template); TEMPLATE_ARGUMENT_KIND(TemplateExpansion); - TEMPLATE_ARGUMENT_KIND(UncommonValue); #undef TEMPLATE_ARGUMENT_KIND } llvm_unreachable("Unhandled ArgKind enum"); diff --git a/clang-tools-extra/clangd/FindTarget.cpp b/clang-tools-extra/clangd/FindTarget.cpp index c5c7d71be6618..3afd655226805 100644 --- a/clang-tools-extra/clangd/FindTarget.cpp +++ b/clang-tools-extra/clangd/FindTarget.cpp @@ -1069,7 +1069,6 @@ class ExplicitReferenceCollector case TemplateArgument::Pack: case TemplateArgument::Type: case TemplateArgument::Expression: - case TemplateArgument::UncommonValue: break; // Handled by VisitType and VisitExpression. }; return RecursiveASTVisitor::TraverseTemplateArgumentLoc(A); diff --git a/clang-tools-extra/clangd/index/remote/Client.cpp b/clang-tools-extra/clangd/index/remote/Client.cpp index a153a8812baff..b09dbf915e462 100644 --- a/clang-tools-extra/clangd/index/remote/Client.cpp +++ b/clang-tools-extra/clangd/index/remote/Client.cpp @@ -152,8 +152,7 @@ class IndexClient : public clangd::SymbolIndex { }); } - llvm::unique_function - indexedFiles() const override { + llvm::unique_function indexedFiles() const { // FIXME: For now we always return "false" regardless of whether the file // was indexed or not. A possible implementation could be based on // the idea that we do not want to send a request at every From 6283d2aa51985d6e6f3404f4b0a3b38b5b05ee6e Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Tue, 22 Dec 2020 10:33:09 -0800 Subject: [PATCH 118/378] Revert "[LLDB] Unbreak the build after recent clang changes" This reverts commit 430d5d8429473c2b10b109991d7577a3cea41140. Depends on a reverted change. --- lldb/include/lldb/lldb-enumerations.h | 1 - lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp | 3 --- 2 files changed, 4 deletions(-) diff --git a/lldb/include/lldb/lldb-enumerations.h b/lldb/include/lldb/lldb-enumerations.h index ff4e15e7e070b..2679ee52136dc 100644 --- a/lldb/include/lldb/lldb-enumerations.h +++ b/lldb/include/lldb/lldb-enumerations.h @@ -816,7 +816,6 @@ enum TemplateArgumentKind { eTemplateArgumentKindExpression, eTemplateArgumentKindPack, eTemplateArgumentKindNullPtr, - eTemplateArgumentKindUncommonValue, }; /// Options that can be set for a formatter to alter its behavior. Not diff --git a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp index 4f55cf7cfa79a..c15b15e736fbe 100644 --- a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp +++ b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp @@ -7002,9 +7002,6 @@ TypeSystemClang::GetTemplateArgumentKind(lldb::opaque_compiler_type_t type, case clang::TemplateArgument::Pack: return eTemplateArgumentKindPack; - - case clang::TemplateArgument::UncommonValue: - return eTemplateArgumentKindUncommonValue; } llvm_unreachable("Unhandled clang::TemplateArgument::ArgKind"); } From a5311d731e1b95e93b35b1e9183a4a531df386e7 Mon Sep 17 00:00:00 2001 From: Stephen Kelly Date: Tue, 27 Oct 2020 23:24:20 +0000 Subject: [PATCH 119/378] [clang-tidy] Handle template instantiations in container size check readability-container-size-empty currently modifies source code based on AST nodes in template instantiations, which means that it makes transformations based on substituted types. This can lead to transforming code to be broken. Change the matcher implementation to ignore template instantiations explicitly, and add a matcher to explicitly handle template declarations instead of instantiations. Differential Revision: https://reviews.llvm.org/D91302 --- .../readability/ContainerSizeEmptyCheck.cpp | 183 ++++++++++++--- .../readability-container-size-empty.cpp | 216 ++++++++++++++++++ 2 files changed, 364 insertions(+), 35 deletions(-) diff --git a/clang-tools-extra/clang-tidy/readability/ContainerSizeEmptyCheck.cpp b/clang-tools-extra/clang-tidy/readability/ContainerSizeEmptyCheck.cpp index 14a38c09ad9b0..e7c5f0ab05be9 100644 --- a/clang-tools-extra/clang-tidy/readability/ContainerSizeEmptyCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/ContainerSizeEmptyCheck.cpp @@ -16,6 +16,77 @@ using namespace clang::ast_matchers; namespace clang { +namespace ast_matchers { +AST_POLYMORPHIC_MATCHER_P2(hasAnyArgumentWithParam, + AST_POLYMORPHIC_SUPPORTED_TYPES(CallExpr, + CXXConstructExpr), + internal::Matcher, ArgMatcher, + internal::Matcher, ParamMatcher) { + BoundNodesTreeBuilder Result; + // The first argument of an overloaded member operator is the implicit object + // argument of the method which should not be matched against a parameter, so + // we skip over it here. + BoundNodesTreeBuilder Matches; + unsigned ArgIndex = cxxOperatorCallExpr(callee(cxxMethodDecl())) + .matches(Node, Finder, &Matches) + ? 1 + : 0; + int ParamIndex = 0; + for (; ArgIndex < Node.getNumArgs(); ++ArgIndex) { + BoundNodesTreeBuilder ArgMatches(*Builder); + if (ArgMatcher.matches(*(Node.getArg(ArgIndex)->IgnoreParenCasts()), Finder, + &ArgMatches)) { + BoundNodesTreeBuilder ParamMatches(ArgMatches); + if (expr(anyOf(cxxConstructExpr(hasDeclaration(cxxConstructorDecl( + hasParameter(ParamIndex, ParamMatcher)))), + callExpr(callee(functionDecl( + hasParameter(ParamIndex, ParamMatcher)))))) + .matches(Node, Finder, &ParamMatches)) { + Result.addMatch(ParamMatches); + *Builder = std::move(Result); + return true; + } + } + ++ParamIndex; + } + return false; +} + +AST_MATCHER(Expr, usedInBooleanContext) { + const char *ExprName = "__booleanContextExpr"; + auto Result = + expr(expr().bind(ExprName), + anyOf(hasParent(varDecl(hasType(booleanType()))), + hasParent(cxxConstructorDecl( + hasAnyConstructorInitializer(cxxCtorInitializer( + withInitializer(expr(equalsBoundNode(ExprName))), + forField(hasType(booleanType())))))), + hasParent(fieldDecl(hasType(booleanType()))), + hasParent(stmt(anyOf( + explicitCastExpr(hasDestinationType(booleanType())), + ifStmt(hasCondition(expr(equalsBoundNode(ExprName)))), + doStmt(hasCondition(expr(equalsBoundNode(ExprName)))), + whileStmt(hasCondition(expr(equalsBoundNode(ExprName)))), + forStmt(hasCondition(expr(equalsBoundNode(ExprName)))), + conditionalOperator( + hasCondition(expr(equalsBoundNode(ExprName)))), + parenListExpr(hasParent(varDecl(hasType(booleanType())))), + parenExpr(hasParent( + explicitCastExpr(hasDestinationType(booleanType())))), + returnStmt(forFunction(returns(booleanType()))), + cxxUnresolvedConstructExpr(hasType(booleanType())), + callExpr(hasAnyArgumentWithParam( + expr(equalsBoundNode(ExprName)), + parmVarDecl(hasType(booleanType())))), + binaryOperator(hasAnyOperatorName("&&", "||")), + unaryOperator(hasOperatorName("!")).bind("NegOnSize")))))) + .matches(Node, Finder, Builder); + Builder->removeBindings([ExprName](const BoundNodesMap &Nodes) { + return Nodes.getNode(ExprName).getNodeKind().isNone(); + }); + return Result; +} +} // namespace ast_matchers namespace tidy { namespace readability { @@ -26,18 +97,27 @@ ContainerSizeEmptyCheck::ContainerSizeEmptyCheck(StringRef Name, : ClangTidyCheck(Name, Context) {} void ContainerSizeEmptyCheck::registerMatchers(MatchFinder *Finder) { - const auto ValidContainer = qualType(hasUnqualifiedDesugaredType( - recordType(hasDeclaration(cxxRecordDecl(isSameOrDerivedFrom( - namedDecl( - has(cxxMethodDecl( - isConst(), parameterCountIs(0), isPublic(), - hasName("size"), - returns(qualType(isInteger(), unless(booleanType())))) - .bind("size")), - has(cxxMethodDecl(isConst(), parameterCountIs(0), isPublic(), - hasName("empty"), returns(booleanType())) - .bind("empty"))) - .bind("container"))))))); + const auto ValidContainerRecord = cxxRecordDecl(isSameOrDerivedFrom( + namedDecl( + has(cxxMethodDecl(isConst(), parameterCountIs(0), isPublic(), + hasName("size"), + returns(qualType(isInteger(), unless(booleanType()), + unless(elaboratedType())))) + .bind("size")), + has(cxxMethodDecl(isConst(), parameterCountIs(0), isPublic(), + hasName("empty"), returns(booleanType())) + .bind("empty"))) + .bind("container"))); + + const auto ValidContainerNonTemplateType = + qualType(hasUnqualifiedDesugaredType( + recordType(hasDeclaration(ValidContainerRecord)))); + const auto ValidContainerTemplateType = + qualType(hasUnqualifiedDesugaredType(templateSpecializationType( + hasDeclaration(classTemplateDecl(has(ValidContainerRecord)))))); + + const auto ValidContainer = qualType( + anyOf(ValidContainerNonTemplateType, ValidContainerTemplateType)); const auto WrongUse = traverse( TK_AsIs, @@ -52,18 +132,34 @@ void ContainerSizeEmptyCheck::registerMatchers(MatchFinder *Finder) { anyOf(hasParent( unaryOperator(hasOperatorName("!")).bind("NegOnSize")), anything()))), - hasParent(explicitCastExpr(hasDestinationType(booleanType()))))); + usedInBooleanContext())); Finder->addMatcher( - cxxMemberCallExpr(on(expr(anyOf(hasType(ValidContainer), + cxxMemberCallExpr(unless(isInTemplateInstantiation()), + on(expr(anyOf(hasType(ValidContainer), hasType(pointsTo(ValidContainer)), - hasType(references(ValidContainer))))), + hasType(references(ValidContainer)))) + .bind("MemberCallObject")), callee(cxxMethodDecl(hasName("size"))), WrongUse, unless(hasAncestor(cxxMethodDecl( ofClass(equalsBoundNode("container")))))) .bind("SizeCallExpr"), this); + Finder->addMatcher( + callExpr(has(cxxDependentScopeMemberExpr( + hasObjectExpression( + expr(anyOf(hasType(ValidContainer), + hasType(pointsTo(ValidContainer)), + hasType(references(ValidContainer)))) + .bind("MemberCallObject")), + hasMemberName("size"))), + WrongUse, + unless(hasAncestor( + cxxMethodDecl(ofClass(equalsBoundNode("container")))))) + .bind("SizeCallExpr"), + this); + // Empty constructor matcher. const auto DefaultConstructor = cxxConstructExpr( hasDeclaration(cxxConstructorDecl(isDefaultConstructor()))); @@ -72,12 +168,11 @@ void ContainerSizeEmptyCheck::registerMatchers(MatchFinder *Finder) { ignoringImpCasts(stringLiteral(hasSize(0))), ignoringImpCasts(cxxBindTemporaryExpr(has(DefaultConstructor))), ignoringImplicit(DefaultConstructor), - cxxConstructExpr( - hasDeclaration(cxxConstructorDecl(isCopyConstructor())), - has(expr(ignoringImpCasts(DefaultConstructor)))), - cxxConstructExpr( - hasDeclaration(cxxConstructorDecl(isMoveConstructor())), - has(expr(ignoringImpCasts(DefaultConstructor))))); + cxxConstructExpr(hasDeclaration(cxxConstructorDecl(isCopyConstructor())), + has(expr(ignoringImpCasts(DefaultConstructor)))), + cxxConstructExpr(hasDeclaration(cxxConstructorDecl(isMoveConstructor())), + has(expr(ignoringImpCasts(DefaultConstructor)))), + cxxUnresolvedConstructExpr(argumentCountIs(0))); // Match the object being compared. const auto STLArg = anyOf(unaryOperator( @@ -87,6 +182,7 @@ void ContainerSizeEmptyCheck::registerMatchers(MatchFinder *Finder) { expr(hasType(ValidContainer)).bind("STLObject")); Finder->addMatcher( cxxOperatorCallExpr( + unless(isInTemplateInstantiation()), hasAnyOverloadedOperatorName("==", "!="), anyOf(allOf(hasArgument(0, WrongComparend), hasArgument(1, STLArg)), allOf(hasArgument(0, STLArg), hasArgument(1, WrongComparend))), @@ -94,24 +190,33 @@ void ContainerSizeEmptyCheck::registerMatchers(MatchFinder *Finder) { cxxMethodDecl(ofClass(equalsBoundNode("container")))))) .bind("BinCmp"), this); + Finder->addMatcher( + binaryOperator(hasAnyOperatorName("==", "!="), + anyOf(allOf(hasLHS(WrongComparend), hasRHS(STLArg)), + allOf(hasLHS(STLArg), hasRHS(WrongComparend))), + unless(hasAncestor( + cxxMethodDecl(ofClass(equalsBoundNode("container")))))) + .bind("BinCmp"), + this); } void ContainerSizeEmptyCheck::check(const MatchFinder::MatchResult &Result) { - const auto *MemberCall = - Result.Nodes.getNodeAs("SizeCallExpr"); + const auto *MemberCall = Result.Nodes.getNodeAs("SizeCallExpr"); + const auto *MemberCallObject = + Result.Nodes.getNodeAs("MemberCallObject"); const auto *BinCmp = Result.Nodes.getNodeAs("BinCmp"); + const auto *BinCmpTempl = Result.Nodes.getNodeAs("BinCmp"); const auto *BinaryOp = Result.Nodes.getNodeAs("SizeBinaryOp"); const auto *Pointee = Result.Nodes.getNodeAs("Pointee"); const auto *E = - MemberCall - ? MemberCall->getImplicitObjectArgument() + MemberCallObject + ? MemberCallObject : (Pointee ? Pointee : Result.Nodes.getNodeAs("STLObject")); FixItHint Hint; std::string ReplacementText = std::string( Lexer::getSourceText(CharSourceRange::getTokenRange(E->getSourceRange()), *Result.SourceManager, getLangOpts())); - if (BinCmp && IsBinaryOrTernary(E)) { - // Not just a DeclRefExpr, so parenthesize to be on the safe side. + if (IsBinaryOrTernary(E) || isa(E)) { ReplacementText = "(" + ReplacementText + ")"; } if (E->getType()->isPointerType()) @@ -125,7 +230,13 @@ void ContainerSizeEmptyCheck::check(const MatchFinder::MatchResult &Result) { } Hint = FixItHint::CreateReplacement(BinCmp->getSourceRange(), ReplacementText); - } else if (BinaryOp) { // Determine the correct transformation. + } else if (BinCmpTempl) { + if (BinCmpTempl->getOpcode() == BinaryOperatorKind::BO_NE) { + ReplacementText = "!" + ReplacementText; + } + Hint = FixItHint::CreateReplacement(BinCmpTempl->getSourceRange(), + ReplacementText); + } else if (BinaryOp) { // Determine the correct transformation. bool Negation = false; const bool ContainerIsLHS = !llvm::isa(BinaryOp->getLHS()->IgnoreImpCasts()); @@ -195,15 +306,17 @@ void ContainerSizeEmptyCheck::check(const MatchFinder::MatchResult &Result) { "!" + ReplacementText); } - if (MemberCall) { - diag(MemberCall->getBeginLoc(), - "the 'empty' method should be used to check " - "for emptiness instead of 'size'") + auto WarnLoc = MemberCall ? MemberCall->getBeginLoc() : SourceLocation{}; + + if (WarnLoc.isValid()) { + diag(WarnLoc, "the 'empty' method should be used to check " + "for emptiness instead of 'size'") << Hint; } else { - diag(BinCmp->getBeginLoc(), - "the 'empty' method should be used to check " - "for emptiness instead of comparing to an empty object") + WarnLoc = BinCmpTempl ? BinCmpTempl->getBeginLoc() + : (BinCmp ? BinCmp->getBeginLoc() : SourceLocation{}); + diag(WarnLoc, "the 'empty' method should be used to check " + "for emptiness instead of comparing to an empty object") << Hint; } diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability-container-size-empty.cpp b/clang-tools-extra/test/clang-tidy/checkers/readability-container-size-empty.cpp index 9100559233e3c..e730b1b221309 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/readability-container-size-empty.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/readability-container-size-empty.cpp @@ -96,6 +96,11 @@ class Container4 { bool empty() const { return *this == Container4(); } }; +struct Lazy { + constexpr unsigned size() const { return 0; } + constexpr bool empty() const { return true; } +}; + std::string s_func() { return std::string(); } @@ -440,6 +445,43 @@ bool returnsBool() { // CHECK-FIXES: {{^ }}return !derived.empty(); } +class ConstructWithBoolField { + bool B; +public: + ConstructWithBoolField(const std::vector &C) : B(C.size()) {} +// CHECK-MESSAGES: :[[@LINE-1]]:57: warning: the 'empty' method should be used +// CHECK-MESSAGES: :9:8: note: method 'vector'::empty() defined here +// CHECK-FIXES: {{^ }}ConstructWithBoolField(const std::vector &C) : B(!C.empty()) {} +}; + +struct StructWithNSDMI { + std::vector C; + bool B = C.size(); +// CHECK-MESSAGES: :[[@LINE-1]]:12: warning: the 'empty' method should be used +// CHECK-MESSAGES: :9:8: note: method 'vector'::empty() defined here +// CHECK-FIXES: {{^ }}bool B = !C.empty(); +}; + +int func(const std::vector &C) { + return C.size() ? 0 : 1; +// CHECK-MESSAGES: :[[@LINE-1]]:10: warning: the 'empty' method should be used +// CHECK-MESSAGES: :9:8: note: method 'vector'::empty() defined here +// CHECK-FIXES: {{^ }}return !C.empty() ? 0 : 1; +} + +constexpr Lazy L; +static_assert(!L.size(), ""); +// CHECK-MESSAGES: :[[@LINE-1]]:16: warning: the 'empty' method should be used +// CHECK-MESSAGES: :101:18: note: method 'Lazy'::empty() defined here +// CHECK-FIXES: {{^}}static_assert(L.empty(), ""); + +struct StructWithLazyNoexcept { + void func() noexcept(L.size()); +// CHECK-MESSAGES: :[[@LINE-1]]:24: warning: the 'empty' method should be used +// CHECK-MESSAGES: :101:18: note: method 'Lazy'::empty() defined here +// CHECK-FIXES: {{^ }}void func() noexcept(!L.empty()); +}; + #define CHECKSIZE(x) if (x.size()) {} // CHECK-FIXES: #define CHECKSIZE(x) if (x.size()) {} @@ -483,3 +525,177 @@ void g() { f(); f(); } + +template +bool neverInstantiatedTemplate() { + std::vector v; + if (v.size()) + ; + // CHECK-MESSAGES: :[[@LINE-2]]:7: warning: the 'empty' method should be used to check for emptiness instead of 'size' [readability-container-size-empty] + // CHECK-MESSAGES: :9:8: note: method 'vector'::empty() defined here + // CHECK-FIXES: {{^ }}if (!v.empty()){{$}} + + if (v == std::vector()) + ; + // CHECK-MESSAGES: :[[@LINE-2]]:7: warning: the 'empty' method should be used to check for emptiness instead of comparing to an empty object [readability-container-size-empty] + // CHECK-FIXES: {{^ }}if (v.empty()){{$}} + // CHECK-FIXES-NEXT: ; + if (v.size() == 0) + ; + // CHECK-MESSAGES: :[[@LINE-2]]:7: warning: the 'empty' method should be used to check for emptiness instead of 'size' [readability-container-size-empty] + // CHECK-MESSAGES: :9:8: note: method 'vector'::empty() defined here + // CHECK-FIXES: {{^ }}if (v.empty()){{$}} + if (v.size() != 0) + ; + // CHECK-MESSAGES: :[[@LINE-2]]:7: warning: the 'empty' method should be used to check for emptiness instead of 'size' [readability-container-size-empty] + // CHECK-MESSAGES: :9:8: note: method 'vector'::empty() defined here + // CHECK-FIXES: {{^ }}if (!v.empty()){{$}} + if (v.size() < 1) + ; + // CHECK-MESSAGES: :[[@LINE-2]]:7: warning: the 'empty' method should be used to check for emptiness instead of 'size' [readability-container-size-empty] + // CHECK-MESSAGES: :9:8: note: method 'vector'::empty() defined here + // CHECK-FIXES: {{^ }}if (v.empty()){{$}} + if (v.size() > 0) + ; + // CHECK-MESSAGES: :[[@LINE-2]]:7: warning: the 'empty' method should be used to check for emptiness instead of 'size' [readability-container-size-empty] + // CHECK-MESSAGES: :9:8: note: method 'vector'::empty() defined here + // CHECK-FIXES: {{^ }}if (!v.empty()){{$}} + if (v.size() == 1) + ; + if (v.size() != 1) + ; + if (v.size() == 2) + ; + if (v.size() != 2) + ; + + if (static_cast(v.size())) + ; + // CHECK-MESSAGES: :[[@LINE-2]]:25: warning: the 'empty' method should be used to check for emptiness instead of 'size' [readability-container-size-empty] + // CHECK-MESSAGES: :9:8: note: method 'vector'::empty() defined here + // CHECK-FIXES: {{^ }}if (static_cast(!v.empty())){{$}} + if (v.size() && false) + ; + // CHECK-MESSAGES: :[[@LINE-2]]:7: warning: the 'empty' method should be used to check for emptiness instead of 'size' [readability-container-size-empty] + // CHECK-MESSAGES: :9:8: note: method 'vector'::empty() defined here + // CHECK-FIXES: {{^ }}if (!v.empty() && false){{$}} + if (!v.size()) + ; + // CHECK-MESSAGES: :[[@LINE-2]]:8: warning: the 'empty' method should be used to check for emptiness instead of 'size' [readability-container-size-empty] + // CHECK-MESSAGES: :9:8: note: method 'vector'::empty() defined here + // CHECK-FIXES: {{^ }}if (v.empty()){{$}} + + TemplatedContainer templated_container; + if (templated_container.size()) + ; + // CHECK-MESSAGES: :[[@LINE-2]]:7: warning: the 'empty' method should be used + // CHECK-MESSAGES: :44:8: note: method 'TemplatedContainer'::empty() defined here + // CHECK-FIXES: {{^ }}if (!templated_container.empty()){{$}} + if (templated_container != TemplatedContainer()) + ; + // CHECK-MESSAGES: :[[@LINE-2]]:7: warning: the 'empty' method should be used + // CHECK-MESSAGES: :44:8: note: method 'TemplatedContainer'::empty() defined here + // CHECK-FIXES: {{^ }}if (!templated_container.empty()){{$}} + // CHECK-FIXES-NEXT: ; + while (templated_container.size()) + ; + // CHECK-MESSAGES: :[[@LINE-2]]:10: warning: the 'empty' method should be used + // CHECK-MESSAGES: :44:8: note: method 'TemplatedContainer'::empty() defined here + // CHECK-FIXES: {{^ }}while (!templated_container.empty()){{$}} + + do { + } + while (templated_container.size()); + // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: the 'empty' method should be used + // CHECK-MESSAGES: :44:8: note: method 'TemplatedContainer'::empty() defined here + // CHECK-FIXES: {{^ }}while (!templated_container.empty()); + + for (; templated_container.size();) + ; + // CHECK-MESSAGES: :[[@LINE-2]]:10: warning: the 'empty' method should be used + // CHECK-MESSAGES: :44:8: note: method 'TemplatedContainer'::empty() defined here + // CHECK-FIXES: {{^ }}for (; !templated_container.empty();){{$}} + + if (true && templated_container.size()) + ; + // CHECK-MESSAGES: :[[@LINE-2]]:15: warning: the 'empty' method should be used + // CHECK-MESSAGES: :44:8: note: method 'TemplatedContainer'::empty() defined here + // CHECK-FIXES: {{^ }}if (true && !templated_container.empty()){{$}} + + if (true || templated_container.size()) + ; + // CHECK-MESSAGES: :[[@LINE-2]]:15: warning: the 'empty' method should be used + // CHECK-MESSAGES: :44:8: note: method 'TemplatedContainer'::empty() defined here + // CHECK-FIXES: {{^ }}if (true || !templated_container.empty()){{$}} + + if (!templated_container.size()) + ; + // CHECK-MESSAGES: :[[@LINE-2]]:8: warning: the 'empty' method should be used + // CHECK-MESSAGES: :44:8: note: method 'TemplatedContainer'::empty() defined here + // CHECK-FIXES: {{^ }}if (templated_container.empty()){{$}} + + bool b1 = templated_container.size(); + // CHECK-MESSAGES: :[[@LINE-1]]:13: warning: the 'empty' method should be used + // CHECK-MESSAGES: :44:8: note: method 'TemplatedContainer'::empty() defined here + // CHECK-FIXES: {{^ }}bool b1 = !templated_container.empty(); + + bool b2(templated_container.size()); + // CHECK-MESSAGES: :[[@LINE-1]]:11: warning: the 'empty' method should be used + // CHECK-MESSAGES: :44:8: note: method 'TemplatedContainer'::empty() defined here + // CHECK-FIXES: {{^ }}bool b2(!templated_container.empty()); + + auto b3 = static_cast(templated_container.size()); + // CHECK-MESSAGES: :[[@LINE-1]]:31: warning: the 'empty' method should be used + // CHECK-MESSAGES: :44:8: note: method 'TemplatedContainer'::empty() defined here + // CHECK-FIXES: {{^ }}auto b3 = static_cast(!templated_container.empty()); + + auto b4 = (bool)templated_container.size(); + // CHECK-MESSAGES: :[[@LINE-1]]:19: warning: the 'empty' method should be used + // CHECK-MESSAGES: :44:8: note: method 'TemplatedContainer'::empty() defined here + // CHECK-FIXES: {{^ }}auto b4 = (bool)!templated_container.empty(); + + auto b5 = bool(templated_container.size()); + // CHECK-MESSAGES: :[[@LINE-1]]:18: warning: the 'empty' method should be used + // CHECK-MESSAGES: :44:8: note: method 'TemplatedContainer'::empty() defined here + // CHECK-FIXES: {{^ }}auto b5 = bool(!templated_container.empty()); + + takesBool(templated_container.size()); + // We don't detect this one because we don't know the parameter of takesBool + // until the type of templated_container.size() is known + + return templated_container.size(); + // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: the 'empty' method should be used + // CHECK-MESSAGES: :44:8: note: method 'TemplatedContainer'::empty() defined here + // CHECK-FIXES: {{^ }}return !templated_container.empty(); +} + +template +void instantiatedTemplateWithSizeCall() { + TypeRequiresSize t; + // The instantiation of the template with std::vector should not + // result in changing the template, because we don't know that + // TypeRequiresSize generally has `.empty()` + if (t.size()) + ; + + if (t == TypeRequiresSize{}) + ; + + if (t != TypeRequiresSize{}) + ; +} + +class TypeWithSize { +public: + TypeWithSize(); + bool operator==(const TypeWithSize &other) const; + bool operator!=(const TypeWithSize &other) const; + + unsigned size() const { return 0; } + // Does not have `.empty()` +}; + +void instantiator() { + instantiatedTemplateWithSizeCall(); + instantiatedTemplateWithSizeCall>(); +} From ae8f4b2178c46da1f10eb9279c9b44fab8b85417 Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Thu, 17 Dec 2020 16:48:04 -0800 Subject: [PATCH 120/378] [AMDGPU] Folding of FI operand with flat scratch Differential Revision: https://reviews.llvm.org/D93501 --- llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 57 ++++++++---- llvm/lib/Target/AMDGPU/SIInstrInfo.h | 3 + llvm/lib/Target/AMDGPU/SIInstrInfo.td | 7 ++ llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp | 4 + .../CodeGen/AMDGPU/flat-scratch-fold-fi.mir | 88 +++++++++++++++++++ .../CodeGen/AMDGPU/frame-index-elimination.ll | 2 +- 6 files changed, 144 insertions(+), 17 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/flat-scratch-fold-fi.mir diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index bfba432848d4c..06cce54e540c0 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -172,9 +172,23 @@ static bool frameIndexMayFold(const SIInstrInfo *TII, const MachineInstr &UseMI, int OpNo, const MachineOperand &OpToFold) { - return OpToFold.isFI() && - TII->isMUBUF(UseMI) && - OpNo == AMDGPU::getNamedOperandIdx(UseMI.getOpcode(), AMDGPU::OpName::vaddr); + if (!OpToFold.isFI()) + return false; + + if (TII->isMUBUF(UseMI)) + return OpNo == AMDGPU::getNamedOperandIdx(UseMI.getOpcode(), + AMDGPU::OpName::vaddr); + if (!TII->isFLATScratch(UseMI)) + return false; + + int SIdx = AMDGPU::getNamedOperandIdx(UseMI.getOpcode(), + AMDGPU::OpName::saddr); + if (OpNo == SIdx) + return true; + + int VIdx = AMDGPU::getNamedOperandIdx(UseMI.getOpcode(), + AMDGPU::OpName::vaddr); + return OpNo == VIdx && SIdx == -1; } FunctionPass *llvm::createSIFoldOperandsPass() { @@ -631,25 +645,36 @@ void SIFoldOperands::foldOperand( // Sanity check that this is a stack access. // FIXME: Should probably use stack pseudos before frame lowering. - if (TII->getNamedOperand(*UseMI, AMDGPU::OpName::srsrc)->getReg() != - MFI->getScratchRSrcReg()) - return; + if (TII->isMUBUF(*UseMI)) { + if (TII->getNamedOperand(*UseMI, AMDGPU::OpName::srsrc)->getReg() != + MFI->getScratchRSrcReg()) + return; - // Ensure this is either relative to the current frame or the current wave. - MachineOperand &SOff = - *TII->getNamedOperand(*UseMI, AMDGPU::OpName::soffset); - if ((!SOff.isReg() || SOff.getReg() != MFI->getStackPtrOffsetReg()) && - (!SOff.isImm() || SOff.getImm() != 0)) - return; + // Ensure this is either relative to the current frame or the current + // wave. + MachineOperand &SOff = + *TII->getNamedOperand(*UseMI, AMDGPU::OpName::soffset); + if ((!SOff.isReg() || SOff.getReg() != MFI->getStackPtrOffsetReg()) && + (!SOff.isImm() || SOff.getImm() != 0)) + return; + + // If this is relative to the current wave, update it to be relative to + // the current frame. + if (SOff.isImm()) + SOff.ChangeToRegister(MFI->getStackPtrOffsetReg(), false); + } // A frame index will resolve to a positive constant, so it should always be // safe to fold the addressing mode, even pre-GFX9. UseMI->getOperand(UseOpIdx).ChangeToFrameIndex(OpToFold.getIndex()); - // If this is relative to the current wave, update it to be relative to the - // current frame. - if (SOff.isImm()) - SOff.ChangeToRegister(MFI->getStackPtrOffsetReg(), false); + if (TII->isFLATScratch(*UseMI) && + AMDGPU::getNamedOperandIdx(UseMI->getOpcode(), + AMDGPU::OpName::vaddr) != -1) { + unsigned NewOpc = AMDGPU::getFlatScratchInstSSfromSV(UseMI->getOpcode()); + UseMI->setDesc(TII->get(NewOpc)); + } + return; } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index 4625cefa1e3e4..75aedee1ec6bc 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -1184,6 +1184,9 @@ namespace AMDGPU { LLVM_READONLY int getFlatScratchInstSTfromSS(uint16_t Opcode); + LLVM_READONLY + int getFlatScratchInstSSfromSV(uint16_t Opcode); + const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL; const uint64_t RSRC_ELEMENT_SIZE_SHIFT = (32 + 19); const uint64_t RSRC_INDEX_STRIDE_SHIFT = (32 + 21); diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index 746d08b8ce0e9..e48138e56d71c 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -2524,6 +2524,13 @@ def getFlatScratchInstSTfromSS : InstrMapping { let ValueCols = [["ST"]]; } +def getFlatScratchInstSSfromSV : InstrMapping { + let FilterClass = "FlatScratchInst"; + let RowFields = ["SVOp"]; + let ColFields = ["Mode"]; + let KeyCol = ["SV"]; + let ValueCols = [["SS"]]; +} include "SIInstructions.td" diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index ab203c44e022f..c91a59003319a 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -1498,6 +1498,10 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, int64_t Offset = FrameInfo.getObjectOffset(Index); if (ST.enableFlatScratch()) { if (TII->isFLATScratch(*MI)) { + assert((int16_t)FIOperandNum == + AMDGPU::getNamedOperandIdx(MI->getOpcode(), + AMDGPU::OpName::saddr)); + // The offset is always swizzled, just replace it if (FrameReg) FIOp.ChangeToRegister(FrameReg, false); diff --git a/llvm/test/CodeGen/AMDGPU/flat-scratch-fold-fi.mir b/llvm/test/CodeGen/AMDGPU/flat-scratch-fold-fi.mir new file mode 100644 index 0000000000000..37cec99ae0ac9 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/flat-scratch-fold-fi.mir @@ -0,0 +1,88 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=gfx900 -amdgpu-enable-flat-scratch -run-pass=si-fold-operands -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s + +--- +name: test_fold_fi_scratch_load_vgpr +stack: + - { id: 0, name: '', type: spill-slot, offset: 0, size: 4, alignment: 4 } +body: | + bb.0.entry: + ; GCN-LABEL: name: test_fold_fi_scratch_load_vgpr + ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec + ; GCN: [[SCRATCH_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD_SADDR %stack.0, 4, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0, addrspace 5) + ; GCN: S_ENDPGM 0 + %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec + %1:vgpr_32 = SCRATCH_LOAD_DWORD %0:vgpr_32, 4, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0, addrspace 5) + S_ENDPGM 0 + +... + +--- +name: test_fold_fi_scratch_load_sgpr +stack: + - { id: 0, name: '', type: spill-slot, offset: 0, size: 4, alignment: 4 } +body: | + bb.0.entry: + ; GCN-LABEL: name: test_fold_fi_scratch_load_sgpr + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 %stack.0 + ; GCN: [[SCRATCH_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD_SADDR %stack.0, 4, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0, addrspace 5) + ; GCN: S_ENDPGM 0 + %0:sgpr_32 = S_MOV_B32 %stack.0 + %1:vgpr_32 = SCRATCH_LOAD_DWORD_SADDR %0:sgpr_32, 4, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0, addrspace 5) + S_ENDPGM 0 + +... + +--- +name: test_fold_fi_scratch_store_vgpr +stack: + - { id: 0, name: '', type: spill-slot, offset: 0, size: 4, alignment: 4 } +body: | + bb.0.entry: + ; GCN-LABEL: name: test_fold_fi_scratch_store_vgpr + ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec + ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN: SCRATCH_STORE_DWORD_SADDR [[DEF]], %stack.0, 4, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.0, addrspace 5) + ; GCN: S_ENDPGM 0 + %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec + %1:vgpr_32 = IMPLICIT_DEF + SCRATCH_STORE_DWORD %1:vgpr_32, %0:vgpr_32, 4, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.0, addrspace 5) + S_ENDPGM 0 + +... + +--- +name: test_no_fold_fi_scratch_store_vgpr +stack: + - { id: 0, name: '', type: spill-slot, offset: 0, size: 4, alignment: 4 } +body: | + bb.0.entry: + ; GCN-LABEL: name: test_no_fold_fi_scratch_store_vgpr + ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec + ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN: SCRATCH_STORE_DWORD [[V_MOV_B32_e32_]], [[DEF]], 4, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.0, addrspace 5) + ; GCN: S_ENDPGM 0 + %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec + %1:vgpr_32 = IMPLICIT_DEF + SCRATCH_STORE_DWORD %0:vgpr_32, %1:vgpr_32, 4, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.0, addrspace 5) + S_ENDPGM 0 + +... + +--- +name: test_fold_fi_scratch_store_sgpr +stack: + - { id: 0, name: '', type: spill-slot, offset: 0, size: 4, alignment: 4 } +body: | + bb.0.entry: + ; GCN-LABEL: name: test_fold_fi_scratch_store_sgpr + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 %stack.0 + ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN: SCRATCH_STORE_DWORD_SADDR [[DEF]], %stack.0, 4, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.0, addrspace 5) + ; GCN: S_ENDPGM 0 + %0:sgpr_32 = S_MOV_B32 %stack.0 + %1:vgpr_32 = IMPLICIT_DEF + SCRATCH_STORE_DWORD_SADDR %1:vgpr_32, %0:sgpr_32, 4, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.0, addrspace 5) + S_ENDPGM 0 + +... diff --git a/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll b/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll index 2c18e724278fd..fcd37840002a1 100644 --- a/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll +++ b/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll @@ -166,7 +166,7 @@ define void @void_func_byval_struct_i8_i32_ptr_value({ i8, i32 } addrspace(5)* b ; GFX9: v_add_u32_e32 [[GEP:v[0-9]+]], 4, [[SP]] ; GFX9-MUBUF: buffer_load_dword v{{[0-9]+}}, off, s[0:3], s32 offset:4{{$}} -; GFX9-FLATSCR: scratch_load_dword v{{[0-9]+}}, [[SP]], off offset:4{{$}} +; GFX9-FLATSCR: scratch_load_dword v{{[0-9]+}}, off, s32 offset:4{{$}} ; GCN: ds_write_b32 v{{[0-9]+}}, [[GEP]] define void @void_func_byval_struct_i8_i32_ptr_nonentry_block({ i8, i32 } addrspace(5)* byval({ i8, i32 }) %arg0, i32 %arg2) #0 { From 333108e8bef8966520defe8602521d0d4e2ef789 Mon Sep 17 00:00:00 2001 From: Arnold Schwaighofer Date: Tue, 15 Dec 2020 13:28:52 -0800 Subject: [PATCH 121/378] Add a llvm.coro.end.async intrinsic The llvm.coro.end.async intrinsic allows to specify a function that is to be called as the last action before returning. This function will be inlined after coroutine splitting. This function can contain a 'musttail' call to allow for guaranteed tail calling as the last action. Differential Revision: https://reviews.llvm.org/D93568 --- llvm/docs/Coroutines.rst | 42 ++++++++ llvm/include/llvm/IR/Intrinsics.td | 2 + llvm/lib/Transforms/Coroutines/CoroEarly.cpp | 7 +- llvm/lib/Transforms/Coroutines/CoroFrame.cpp | 19 +++- llvm/lib/Transforms/Coroutines/CoroInstr.h | 40 ++++++- llvm/lib/Transforms/Coroutines/CoroInternal.h | 4 +- llvm/lib/Transforms/Coroutines/CoroSplit.cpp | 100 ++++++++++++++---- llvm/lib/Transforms/Coroutines/Coroutines.cpp | 25 ++++- llvm/test/Transforms/Coroutines/coro-async.ll | 67 +++++++++++- 9 files changed, 273 insertions(+), 33 deletions(-) diff --git a/llvm/docs/Coroutines.rst b/llvm/docs/Coroutines.rst index 77fb77d9a9673..268e9c79ac8fd 100644 --- a/llvm/docs/Coroutines.rst +++ b/llvm/docs/Coroutines.rst @@ -1389,6 +1389,48 @@ The following table summarizes the handling of `coro.end`_ intrinsic. | | Landingpad | nothing | nothing | +------------+-------------+-------------------+-------------------------------+ + +'llvm.coro.end.async' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +:: + + declare i1 @llvm.coro.end.async(i8* , i1 , ...) + +Overview: +""""""""" + +The '``llvm.coro.end.async``' marks the point where execution of the resume part +of the coroutine should end and control should return to the caller. As part of +its variable tail arguments this instruction allows to specify a function and +the function's arguments that are to be tail called as the last action before +returning. + + +Arguments: +"""""""""" + +The first argument should refer to the coroutine handle of the enclosing +coroutine. A frontend is allowed to supply null as the first parameter, in this +case `coro-early` pass will replace the null with an appropriate coroutine +handle value. + +The second argument should be `true` if this coro.end is in the block that is +part of the unwind sequence leaving the coroutine body due to an exception and +`false` otherwise. + +The third argument if present should specify a function to be called. + +If the third argument is present, the remaining arguments are the arguments to +the function call. + +.. code-block:: llvm + + call i1 (i8*, i1, ...) @llvm.coro.end.async( + i8* %hdl, i1 0, + void (i8*, %async.task*, %async.actor*)* @must_tail_call_return, + i8* %ctxt, %async.task* %task, %async.actor* %actor) + unreachable + .. _coro.suspend: .. _suspend points: diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index f71dc147416bb..aed498d80b026 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1213,6 +1213,8 @@ def int_coro_free : Intrinsic<[llvm_ptr_ty], [llvm_token_ty, llvm_ptr_ty], ReadOnly>, NoCapture>]>; def int_coro_end : Intrinsic<[llvm_i1_ty], [llvm_ptr_ty, llvm_i1_ty], []>; +def int_coro_end_async + : Intrinsic<[llvm_i1_ty], [llvm_ptr_ty, llvm_i1_ty, llvm_vararg_ty], []>; def int_coro_frame : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>; def int_coro_noop : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>; diff --git a/llvm/lib/Transforms/Coroutines/CoroEarly.cpp b/llvm/lib/Transforms/Coroutines/CoroEarly.cpp index 07c571a962b0f..e134ff7f45c67 100644 --- a/llvm/lib/Transforms/Coroutines/CoroEarly.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroEarly.cpp @@ -164,10 +164,11 @@ bool Lowerer::lowerEarlyIntrinsics(Function &F) { if (cast(&I)->isFinal()) CB->setCannotDuplicate(); break; + case Intrinsic::coro_end_async: case Intrinsic::coro_end: // Make sure that fallthrough coro.end is not duplicated as CoroSplit // pass expects that there is at most one fallthrough coro.end. - if (cast(&I)->isFallthrough()) + if (cast(&I)->isFallthrough()) CB->setCannotDuplicate(); break; case Intrinsic::coro_noop: @@ -219,8 +220,8 @@ static bool declaresCoroEarlyIntrinsics(const Module &M) { return coro::declaresIntrinsics( M, {"llvm.coro.id", "llvm.coro.id.retcon", "llvm.coro.id.retcon.once", "llvm.coro.destroy", "llvm.coro.done", "llvm.coro.end", - "llvm.coro.noop", "llvm.coro.free", "llvm.coro.promise", - "llvm.coro.resume", "llvm.coro.suspend"}); + "llvm.coro.end.async", "llvm.coro.noop", "llvm.coro.free", + "llvm.coro.promise", "llvm.coro.resume", "llvm.coro.suspend"}); } PreservedAnalyses CoroEarlyPass::run(Function &F, FunctionAnalysisManager &) { diff --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp index 48d52ff1d8dfa..c67d69a52fd2a 100644 --- a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp @@ -2170,9 +2170,26 @@ void coro::buildCoroutineFrame(Function &F, Shape &Shape) { } // Put CoroEnds into their own blocks. - for (CoroEndInst *CE : Shape.CoroEnds) + for (AnyCoroEndInst *CE : Shape.CoroEnds) { splitAround(CE, "CoroEnd"); + // Emit the musttail call function in a new block before the CoroEnd. + // We do this here so that the right suspend crossing info is computed for + // the uses of the musttail call function call. (Arguments to the coro.end + // instructions would be ignored) + if (auto *AsyncEnd = dyn_cast(CE)) { + auto *MustTailCallFn = AsyncEnd->getMustTailCallFunction(); + if (!MustTailCallFn) + continue; + IRBuilder<> Builder(AsyncEnd); + SmallVector Args(AsyncEnd->args()); + auto Arguments = ArrayRef(Args).drop_front(3); + auto *Call = createMustTailCall(AsyncEnd->getDebugLoc(), MustTailCallFn, + Arguments, Builder); + splitAround(Call, "MustTailCall.Before.CoroEnd"); + } + } + // Transforms multi-edge PHI Nodes, so that any value feeding into a PHI will // never has its definition separated from the PHI by the suspend point. rewritePHIs(F); diff --git a/llvm/lib/Transforms/Coroutines/CoroInstr.h b/llvm/lib/Transforms/Coroutines/CoroInstr.h index 1b83339224739..9fa2fd12f80b3 100644 --- a/llvm/lib/Transforms/Coroutines/CoroInstr.h +++ b/llvm/lib/Transforms/Coroutines/CoroInstr.h @@ -577,8 +577,7 @@ class LLVM_LIBRARY_VISIBILITY CoroSizeInst : public IntrinsicInst { } }; -/// This represents the llvm.coro.end instruction. -class LLVM_LIBRARY_VISIBILITY CoroEndInst : public IntrinsicInst { +class LLVM_LIBRARY_VISIBILITY AnyCoroEndInst : public IntrinsicInst { enum { FrameArg, UnwindArg }; public: @@ -587,6 +586,19 @@ class LLVM_LIBRARY_VISIBILITY CoroEndInst : public IntrinsicInst { return cast(getArgOperand(UnwindArg))->isOneValue(); } + // Methods to support type inquiry through isa, cast, and dyn_cast: + static bool classof(const IntrinsicInst *I) { + auto ID = I->getIntrinsicID(); + return ID == Intrinsic::coro_end || ID == Intrinsic::coro_end_async; + } + static bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } +}; + +/// This represents the llvm.coro.end instruction. +class LLVM_LIBRARY_VISIBILITY CoroEndInst : public AnyCoroEndInst { +public: // Methods to support type inquiry through isa, cast, and dyn_cast: static bool classof(const IntrinsicInst *I) { return I->getIntrinsicID() == Intrinsic::coro_end; @@ -596,6 +608,30 @@ class LLVM_LIBRARY_VISIBILITY CoroEndInst : public IntrinsicInst { } }; +/// This represents the llvm.coro.end instruction. +class LLVM_LIBRARY_VISIBILITY CoroAsyncEndInst : public AnyCoroEndInst { + enum { FrameArg, UnwindArg, MustTailCallFuncArg }; + +public: + void checkWellFormed() const; + + Function *getMustTailCallFunction() const { + if (getNumArgOperands() < 3) + return nullptr; + + return cast( + getArgOperand(MustTailCallFuncArg)->stripPointerCasts()); + } + + // Methods to support type inquiry through isa, cast, and dyn_cast: + static bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::coro_end_async; + } + static bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } +}; + /// This represents the llvm.coro.alloca.alloc instruction. class LLVM_LIBRARY_VISIBILITY CoroAllocaAllocInst : public IntrinsicInst { enum { SizeArg, AlignArg }; diff --git a/llvm/lib/Transforms/Coroutines/CoroInternal.h b/llvm/lib/Transforms/Coroutines/CoroInternal.h index d08ed0aaa4b08..5d026b1edd419 100644 --- a/llvm/lib/Transforms/Coroutines/CoroInternal.h +++ b/llvm/lib/Transforms/Coroutines/CoroInternal.h @@ -92,7 +92,7 @@ enum class ABI { // values used during CoroSplit pass. struct LLVM_LIBRARY_VISIBILITY Shape { CoroBeginInst *CoroBegin; - SmallVector CoroEnds; + SmallVector CoroEnds; SmallVector CoroSizes; SmallVector CoroSuspends; SmallVector SwiftErrorOps; @@ -270,6 +270,8 @@ struct LLVM_LIBRARY_VISIBILITY Shape { }; void buildCoroutineFrame(Function &F, Shape &Shape); +CallInst *createMustTailCall(DebugLoc Loc, Function *MustTailCallFn, + ArrayRef Arguments, IRBuilder<> &); } // End namespace coro. } // End namespace llvm diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp index 7c4a204e953dc..7acd8bda00846 100644 --- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp @@ -173,8 +173,53 @@ static void maybeFreeRetconStorage(IRBuilder<> &Builder, Shape.emitDealloc(Builder, FramePtr, CG); } +/// Replace an llvm.coro.end.async. +/// Will inline the must tail call function call if there is one. +/// \returns true if cleanup of the coro.end block is needed, false otherwise. +static bool replaceCoroEndAsync(AnyCoroEndInst *End) { + IRBuilder<> Builder(End); + + auto *EndAsync = dyn_cast(End); + if (!EndAsync) { + Builder.CreateRetVoid(); + return true /*needs cleanup of coro.end block*/; + } + + auto *MustTailCallFunc = EndAsync->getMustTailCallFunction(); + if (!MustTailCallFunc) { + Builder.CreateRetVoid(); + return true /*needs cleanup of coro.end block*/; + } + + // Move the must tail call from the predecessor block into the end block. + auto *CoroEndBlock = End->getParent(); + auto *MustTailCallFuncBlock = CoroEndBlock->getSinglePredecessor(); + assert(MustTailCallFuncBlock && "Must have a single predecessor block"); + auto It = MustTailCallFuncBlock->getTerminator()->getIterator(); + auto *MustTailCall = cast(&*std::prev(It)); + CoroEndBlock->getInstList().splice( + End->getIterator(), MustTailCallFuncBlock->getInstList(), MustTailCall); + + // Insert the return instruction. + Builder.SetInsertPoint(End); + Builder.CreateRetVoid(); + InlineFunctionInfo FnInfo; + + // Remove the rest of the block, by splitting it into an unreachable block. + auto *BB = End->getParent(); + BB->splitBasicBlock(End); + BB->getTerminator()->eraseFromParent(); + + auto InlineRes = InlineFunction(*MustTailCall, FnInfo); + assert(InlineRes.isSuccess() && "Expected inlining to succeed"); + (void)InlineRes; + + // We have cleaned up the coro.end block above. + return false; +} + /// Replace a non-unwind call to llvm.coro.end. -static void replaceFallthroughCoroEnd(CoroEndInst *End, +static void replaceFallthroughCoroEnd(AnyCoroEndInst *End, const coro::Shape &Shape, Value *FramePtr, bool InResume, CallGraph *CG) { // Start inserting right before the coro.end. @@ -192,9 +237,12 @@ static void replaceFallthroughCoroEnd(CoroEndInst *End, break; // In async lowering this returns. - case coro::ABI::Async: - Builder.CreateRetVoid(); + case coro::ABI::Async: { + bool CoroEndBlockNeedsCleanup = replaceCoroEndAsync(End); + if (!CoroEndBlockNeedsCleanup) + return; break; + } // In unique continuation lowering, the continuations always return void. // But we may have implicitly allocated storage. @@ -229,8 +277,9 @@ static void replaceFallthroughCoroEnd(CoroEndInst *End, } /// Replace an unwind call to llvm.coro.end. -static void replaceUnwindCoroEnd(CoroEndInst *End, const coro::Shape &Shape, - Value *FramePtr, bool InResume, CallGraph *CG){ +static void replaceUnwindCoroEnd(AnyCoroEndInst *End, const coro::Shape &Shape, + Value *FramePtr, bool InResume, + CallGraph *CG) { IRBuilder<> Builder(End); switch (Shape.ABI) { @@ -258,7 +307,7 @@ static void replaceUnwindCoroEnd(CoroEndInst *End, const coro::Shape &Shape, } } -static void replaceCoroEnd(CoroEndInst *End, const coro::Shape &Shape, +static void replaceCoroEnd(AnyCoroEndInst *End, const coro::Shape &Shape, Value *FramePtr, bool InResume, CallGraph *CG) { if (End->isUnwind()) replaceUnwindCoroEnd(End, Shape, FramePtr, InResume, CG); @@ -511,10 +560,10 @@ void CoroCloner::replaceCoroSuspends() { } void CoroCloner::replaceCoroEnds() { - for (CoroEndInst *CE : Shape.CoroEnds) { + for (AnyCoroEndInst *CE : Shape.CoroEnds) { // We use a null call graph because there's no call graph node for // the cloned function yet. We'll just be rebuilding that later. - auto NewCE = cast(VMap[CE]); + auto *NewCE = cast(VMap[CE]); replaceCoroEnd(NewCE, Shape, NewFramePtr, /*in resume*/ true, nullptr); } } @@ -1385,6 +1434,23 @@ static void coerceArguments(IRBuilder<> &Builder, FunctionType *FnTy, } } +CallInst *coro::createMustTailCall(DebugLoc Loc, Function *MustTailCallFn, + ArrayRef Arguments, + IRBuilder<> &Builder) { + auto *FnTy = + cast(MustTailCallFn->getType()->getPointerElementType()); + // Coerce the arguments, llvm optimizations seem to ignore the types in + // vaarg functions and throws away casts in optimized mode. + SmallVector CallArgs; + coerceArguments(Builder, FnTy, Arguments, CallArgs); + + auto *TailCall = Builder.CreateCall(FnTy, MustTailCallFn, CallArgs); + TailCall->setTailCallKind(CallInst::TCK_MustTail); + TailCall->setDebugLoc(Loc); + TailCall->setCallingConv(MustTailCallFn->getCallingConv()); + return TailCall; +} + static void splitAsyncCoroutine(Function &F, coro::Shape &Shape, SmallVectorImpl &Clones) { assert(Shape.ABI == coro::ABI::Async); @@ -1443,18 +1509,10 @@ static void splitAsyncCoroutine(Function &F, coro::Shape &Shape, // Insert the call to the tail call function and inline it. auto *Fn = Suspend->getMustTailCallFunction(); - auto DbgLoc = Suspend->getDebugLoc(); - SmallVector Args(Suspend->operand_values()); - auto FnArgs = ArrayRef(Args).drop_front(3).drop_back(1); - auto FnTy = cast(Fn->getType()->getPointerElementType()); - // Coerce the arguments, llvm optimizations seem to ignore the types in - // vaarg functions and throws away casts in optimized mode. - SmallVector CallArgs; - coerceArguments(Builder, FnTy, FnArgs, CallArgs); - auto *TailCall = Builder.CreateCall(FnTy, Fn, CallArgs); - TailCall->setDebugLoc(DbgLoc); - TailCall->setTailCall(); - TailCall->setCallingConv(Fn->getCallingConv()); + SmallVector Args(Suspend->args()); + auto FnArgs = ArrayRef(Args).drop_front(3); + auto *TailCall = + coro::createMustTailCall(Suspend->getDebugLoc(), Fn, FnArgs, Builder); Builder.CreateRetVoid(); InlineFunctionInfo FnInfo; auto InlineRes = InlineFunction(*TailCall, FnInfo); @@ -1683,7 +1741,7 @@ static void updateCallGraphAfterCoroutineSplit( if (!Shape.CoroBegin) return; - for (llvm::CoroEndInst *End : Shape.CoroEnds) { + for (llvm::AnyCoroEndInst *End : Shape.CoroEnds) { auto &Context = End->getContext(); End->replaceAllUsesWith(ConstantInt::getFalse(Context)); End->eraseFromParent(); diff --git a/llvm/lib/Transforms/Coroutines/Coroutines.cpp b/llvm/lib/Transforms/Coroutines/Coroutines.cpp index 726899f9c04c7..f0095a649b0ca 100644 --- a/llvm/lib/Transforms/Coroutines/Coroutines.cpp +++ b/llvm/lib/Transforms/Coroutines/Coroutines.cpp @@ -131,6 +131,7 @@ static bool isCoroutineIntrinsicName(StringRef Name) { "llvm.coro.destroy", "llvm.coro.done", "llvm.coro.end", + "llvm.coro.end.async", "llvm.coro.frame", "llvm.coro.free", "llvm.coro.id", @@ -316,11 +317,16 @@ void coro::Shape::buildFrom(Function &F) { CoroBegin = CB; break; } + case Intrinsic::coro_end_async: case Intrinsic::coro_end: - CoroEnds.push_back(cast(II)); - if (CoroEnds.back()->isFallthrough()) { + CoroEnds.push_back(cast(II)); + if (auto *AsyncEnd = dyn_cast(II)) { + AsyncEnd->checkWellFormed(); + } + if (CoroEnds.back()->isFallthrough() && isa(II)) { // Make sure that the fallthrough coro.end is the first element in the // CoroEnds vector. + // Note: I don't think this is neccessary anymore. if (CoroEnds.size() > 1) { if (CoroEnds.front()->isFallthrough()) report_fatal_error( @@ -353,7 +359,7 @@ void coro::Shape::buildFrom(Function &F) { } // Replace all coro.ends with unreachable instruction. - for (CoroEndInst *CE : CoroEnds) + for (AnyCoroEndInst *CE : CoroEnds) changeToUnreachable(CE, /*UseLLVMTrap=*/false); return; @@ -713,6 +719,19 @@ void CoroSuspendAsyncInst::checkWellFormed() const { checkAsyncContextProjectFunction(this, getAsyncContextProjectionFunction()); } +void CoroAsyncEndInst::checkWellFormed() const { + auto *MustTailCallFunc = getMustTailCallFunction(); + if (!MustTailCallFunc) + return; + auto *FnTy = + cast(MustTailCallFunc->getType()->getPointerElementType()); + if (FnTy->getNumParams() != (getNumArgOperands() - 3)) + fail(this, + "llvm.coro.end.async must tail call function argument type must " + "match the tail arguments", + MustTailCallFunc); +} + void LLVMAddCoroEarlyPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createCoroEarlyLegacyPass()); } diff --git a/llvm/test/Transforms/Coroutines/coro-async.ll b/llvm/test/Transforms/Coroutines/coro-async.ll index f0036440c803b..bca65c37e7d31 100644 --- a/llvm/test/Transforms/Coroutines/coro-async.ll +++ b/llvm/test/Transforms/Coroutines/coro-async.ll @@ -94,7 +94,7 @@ entry: call void @some_user(i64 %val.2) tail call swiftcc void @asyncReturn(i8* %async.ctxt, %async.task* %task.2, %async.actor* %actor) - call i1 @llvm.coro.end(i8* %hdl, i1 0) + call i1 (i8*, i1, ...) @llvm.coro.end.async(i8* %hdl, i1 0) unreachable } @@ -311,12 +311,75 @@ entry: %continuation_task_arg = extractvalue {i8*, i8*, i8*} %res, 1 %task.2 = bitcast i8* %continuation_task_arg to %async.task* tail call swiftcc void @asyncReturn(i8* %async.ctxt, %async.task* %task.2, %async.actor* %actor) - call i1 @llvm.coro.end(i8* %hdl, i1 0) + call i1 (i8*, i1, ...) @llvm.coro.end.async(i8* %hdl, i1 0) unreachable } + +@multiple_coro_end_async_fp = constant <{ i32, i32 }> + <{ i32 trunc ( ; Relative pointer to async function + i64 sub ( + i64 ptrtoint (void (i8*, %async.task*, %async.actor*)* @multiple_coro_end_async to i64), + i64 ptrtoint (i32* getelementptr inbounds (<{ i32, i32 }>, <{ i32, i32 }>* @multiple_coro_end_async_fp, i32 0, i32 1) to i64) + ) + to i32), + i32 128 ; Initial async context size without space for frame +}> + +define swiftcc void @must_tail_call_return(i8* %async.ctxt, %async.task* %task, %async.actor* %actor) { + musttail call swiftcc void @asyncReturn(i8* %async.ctxt, %async.task* %task, %async.actor* %actor) + ret void +} + +define swiftcc void @multiple_coro_end_async(i8* %async.ctxt, %async.task* %task, %async.actor* %actor) { +entry: + %id = call token @llvm.coro.id.async(i32 128, i32 16, i32 0, + i8* bitcast (<{i32, i32}>* @dont_crash_on_cf_fp to i8*)) + %hdl = call i8* @llvm.coro.begin(token %id, i8* null) + %arg0 = bitcast %async.task* %task to i8* + %arg1 = bitcast <{ i32, i32}>* @my_other_async_function_fp to i8* + %callee_context = call i8* @llvm.coro.async.context.alloc(i8* %arg0, i8* %arg1) + %callee_context.0 = bitcast i8* %callee_context to %async.ctxt* + %callee_context.return_to_caller.addr = getelementptr inbounds %async.ctxt, %async.ctxt* %callee_context.0, i32 0, i32 1 + %return_to_caller.addr = bitcast void(i8*, %async.task*, %async.actor*)** %callee_context.return_to_caller.addr to i8** + %resume.func_ptr = call i8* @llvm.coro.async.resume() + store i8* %resume.func_ptr, i8** %return_to_caller.addr + %callee_context.caller_context.addr = getelementptr inbounds %async.ctxt, %async.ctxt* %callee_context.0, i32 0, i32 0 + store i8* %async.ctxt, i8** %callee_context.caller_context.addr + %resume_proj_fun = bitcast i8*(i8*)* @resume_context_projection to i8* + %callee = bitcast void(i8*, %async.task*, %async.actor*)* @asyncSuspend to i8* + %res = call {i8*, i8*, i8*} (i8*, i8*, ...) @llvm.coro.suspend.async( + i8* %resume.func_ptr, + i8* %resume_proj_fun, + void (i8*, i8*, %async.task*, %async.actor*)* @dont_crash_on_cf_dispatch, + i8* %callee, i8* %callee_context, %async.task* %task, %async.actor *%actor) + + call void @llvm.coro.async.context.dealloc(i8* %callee_context) + %continuation_task_arg = extractvalue {i8*, i8*, i8*} %res, 1 + %task.2 = bitcast i8* %continuation_task_arg to %async.task* + %eq = icmp eq i8 * %continuation_task_arg, null + br i1 %eq, label %is_equal, label %is_not_equal + +is_equal: + tail call swiftcc void @asyncReturn(i8* %async.ctxt, %async.task* %task.2, %async.actor* %actor) + call i1 (i8*, i1, ...) @llvm.coro.end.async(i8* %hdl, i1 0) + unreachable + +is_not_equal: + call i1 (i8*, i1, ...) @llvm.coro.end.async( + i8* %hdl, i1 0, + void (i8*, %async.task*, %async.actor*)* @must_tail_call_return, + i8* %async.ctxt, %async.task* %task.2, %async.actor* null) + unreachable +} + +; CHECK-LABEL: define internal swiftcc void @multiple_coro_end_async.resume.0( +; CHECK: musttail call swiftcc void @asyncReturn( +; CHECK: ret void + declare i8* @llvm.coro.prepare.async(i8*) declare token @llvm.coro.id.async(i32, i32, i32, i8*) declare i8* @llvm.coro.begin(token, i8*) +declare i1 @llvm.coro.end.async(i8*, i1, ...) declare i1 @llvm.coro.end(i8*, i1) declare {i8*, i8*, i8*} @llvm.coro.suspend.async(i8*, i8*, ...) declare i8* @llvm.coro.async.context.alloc(i8*, i8*) From 9cb748724ef5d7d0f7d662d849423404267d532a Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Tue, 22 Dec 2020 12:49:11 -0600 Subject: [PATCH 122/378] [OpenMP][Docs] Add FAQ entry about math and complex on GPUs Reviewed By: tianshilei1992 Differential Revision: https://reviews.llvm.org/D93718 --- openmp/docs/SupportAndFAQ.rst | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/openmp/docs/SupportAndFAQ.rst b/openmp/docs/SupportAndFAQ.rst index 37c5bcecfcccd..489b66d5b4a60 100644 --- a/openmp/docs/SupportAndFAQ.rst +++ b/openmp/docs/SupportAndFAQ.rst @@ -80,3 +80,32 @@ For now, the answer is most likely *no*. Please see :ref:`build_offload_capable_ Q: Does OpenMP offloading support work in packages distributed as part of my OS? ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ For now, the answer is most likely *no*. Please see :ref:`build_offload_capable_compiler`. + + +.. _math_and_complex_in_target_regions: + +Q: Does Clang support `` and `` operations in OpenMP target on GPUs? +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Yes, LLVM/Clang allows math functions and complex arithmetic inside of OpenMP target regions +that are compiled for GPUs. + +Clang provides a set of wrapper headers that are found first when `math.h` and +`complex.h`, for C, `cmath` and `complex`, foc C++, or similar headers are +included by the application. These wrappers will eventually include the system +version of the corresponding header file after setting up a target device +specific environment. The fact that the system header is included is important +because they differ based on the architecture and operating system and may +contain preprocessor, variable, and function definitions that need to be +available in the target region regardless of the targeted device architecture. +However, various functions may require specialized device versions, e.g., +`sin`, and others are only available on certain devices, e.g., `__umul64hi`. To +provide "native" support for math and complex on the respective architecture, +Clang will wrap the "native" math functions, e.g., as provided by the device +vendor, in an OpenMP begin/end declare variant. These functions will then be +picked up instead of the host versions while host only variables and function +definitions are still available. Complex arithmetic and functions are support +through a similar mechanism. It is worth noting that this support requires +`extensions to the OpenMP begin/end declare variant context selector +`__ +that are exposed through LLVM/Clang to the user as well. From 1eb082c2ea426f1dab4d1b3541b37c883b3a6b4f Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Tue, 22 Dec 2020 14:05:46 -0500 Subject: [PATCH 123/378] [OpenMP][Docs] Fixed a typo in the doc that can mislead users to a CMake error When setting `LLVM_ENABLE_RUNTIMES`, lower case word should be used; otherwise, it can cause a CMake error that specific path is not found. Reviewed By: ye-luo Differential Revision: https://reviews.llvm.org/D93719 --- openmp/docs/SupportAndFAQ.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openmp/docs/SupportAndFAQ.rst b/openmp/docs/SupportAndFAQ.rst index 489b66d5b4a60..647fa57a9e760 100644 --- a/openmp/docs/SupportAndFAQ.rst +++ b/openmp/docs/SupportAndFAQ.rst @@ -53,7 +53,7 @@ Q: How to build an OpenMP offload capable compiler? ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ To build an *effective* OpenMP offload capable compiler, only one extra CMake -option, `LLVM_ENABLE_RUNTIMES="OPENMP"`, is needed when building LLVM (Generic +option, `LLVM_ENABLE_RUNTIMES="openmp"`, is needed when building LLVM (Generic information about building LLVM is available `here `__.). Make sure all backends that are targeted by OpenMP to be enabled. By default, Clang will be build with all backends enabled. From 7b0f9dd79a3c1799866a7b08e172598df90c649f Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Tue, 22 Dec 2020 13:05:45 -0600 Subject: [PATCH 124/378] [OpenMP][Docs] Fix Typo --- openmp/docs/SupportAndFAQ.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openmp/docs/SupportAndFAQ.rst b/openmp/docs/SupportAndFAQ.rst index 647fa57a9e760..ef209aed17a0e 100644 --- a/openmp/docs/SupportAndFAQ.rst +++ b/openmp/docs/SupportAndFAQ.rst @@ -91,7 +91,7 @@ Yes, LLVM/Clang allows math functions and complex arithmetic inside of OpenMP ta that are compiled for GPUs. Clang provides a set of wrapper headers that are found first when `math.h` and -`complex.h`, for C, `cmath` and `complex`, foc C++, or similar headers are +`complex.h`, for C, `cmath` and `complex`, for C++, or similar headers are included by the application. These wrappers will eventually include the system version of the corresponding header file after setting up a target device specific environment. The fact that the system header is included is important From 53deef9e0b8ffa1a4a9c0f88b5fc0621978f9ea6 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 22 Dec 2020 11:40:51 -0800 Subject: [PATCH 125/378] [RISCV] Remove unneeded !eq comparing a single bit value to 0/1 in RISCVInstrInfoVPseudos.td. NFC Instead we can either use the bit directly. If it was checking for 0 we need to swap the operands or use !not. --- .../Target/RISCV/RISCVInstrInfoVPseudos.td | 46 +++++++++---------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td index 67bdfa80e8c4f..c28bd30936438 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -543,7 +543,7 @@ class VPseudoBinaryCarryIn : Pseudo<(outs RetClass:$rd), - !if(!eq(CarryIn, 1), + !if(CarryIn, (ins Op1Class:$rs2, Op2Class:$rs1, VMV0:$carry, GPR:$vl, ixlenimm:$sew), (ins Op1Class:$rs2, Op2Class:$rs1, GPR:$vl, ixlenimm:$sew)), []>, @@ -554,8 +554,8 @@ class VPseudoBinaryCarryIn(PseudoToVInst.VInst); let VLMul = MInfo.value; @@ -675,8 +675,8 @@ multiclass VPseudoBinaryV_VV { multiclass VPseudoBinaryV_VX { foreach m = MxList.m in - defm !if(!eq(IsFloat, 0), "_VX", "_VF") : VPseudoBinary; + defm !if(IsFloat, "_VF", "_VX") : VPseudoBinary; } multiclass VPseudoBinaryV_VI { @@ -699,8 +699,8 @@ multiclass VPseudoBinaryW_VV { multiclass VPseudoBinaryW_VX { foreach m = MxList.m[0-5] in - defm !if(!eq(IsFloat, 0), "_VX", "_VF") : VPseudoBinary; } @@ -712,8 +712,8 @@ multiclass VPseudoBinaryW_WV { multiclass VPseudoBinaryW_WX { foreach m = MxList.m[0-5] in - defm !if(!eq(IsFloat, 0), "_WX", "_WF") : VPseudoBinary; } @@ -741,9 +741,9 @@ multiclass VPseudoBinaryV_WI { multiclass VPseudoBinaryV_VM { foreach m = MxList.m in - def "_VV" # !if(!eq(CarryIn, 1), "M", "") # "_" # m.MX : - VPseudoBinaryCarryIn.R, m.vrclass)), m.vrclass, m.vrclass, m, CarryIn, Constraint>; } @@ -751,9 +751,9 @@ multiclass VPseudoBinaryV_VM { foreach m = MxList.m in - def "_VX" # !if(!eq(CarryIn, 1), "M", "") # "_" # m.MX : - VPseudoBinaryCarryIn.R, m.vrclass)), m.vrclass, GPR, m, CarryIn, Constraint>; } @@ -761,9 +761,9 @@ multiclass VPseudoBinaryV_XM { foreach m = MxList.m in - def "_VI" # !if(!eq(CarryIn, 1), "M", "") # "_" # m.MX : - VPseudoBinaryCarryIn.R, m.vrclass)), m.vrclass, simm5, m, CarryIn, Constraint>; } @@ -789,8 +789,8 @@ multiclass VPseudoBinaryM_VV { multiclass VPseudoBinaryM_VX { foreach m = MxList.m in - defm !if(!eq(IsFloat, 0), "_VX", "_VF") : - VPseudoBinary; } @@ -1374,7 +1374,7 @@ multiclass VPatBinaryV_VM { foreach vti = AllIntegerVectors in defm : VPatBinaryCarryIn; @@ -1384,7 +1384,7 @@ multiclass VPatBinaryV_XM { foreach vti = AllIntegerVectors in defm : VPatBinaryCarryIn; @@ -1394,7 +1394,7 @@ multiclass VPatBinaryV_IM { foreach vti = AllIntegerVectors in defm : VPatBinaryCarryIn; From 7ec7788ac175f3ccb7083de0e786438ad8610771 Mon Sep 17 00:00:00 2001 From: Stephen Kelly Date: Tue, 22 Dec 2020 20:25:10 +0000 Subject: [PATCH 126/378] Try to fix build on Windows --- .../clang-tidy/checkers/readability-container-size-empty.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability-container-size-empty.cpp b/clang-tools-extra/test/clang-tidy/checkers/readability-container-size-empty.cpp index e730b1b221309..4fe75f46932d7 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/readability-container-size-empty.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/readability-container-size-empty.cpp @@ -1,4 +1,4 @@ -// RUN: %check_clang_tidy %s readability-container-size-empty %t +// RUN: %check_clang_tidy %s readability-container-size-empty %t -- -- -fno-delayed-template-parsing namespace std { template struct vector { From 57ffbe020af6469b7c2fdb599f2f7e5e5d0322f0 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Tue, 22 Dec 2020 11:00:57 -0500 Subject: [PATCH 127/378] glld/mac] Don't add names of unreferenced symbols to string table Before this, a hello world program would contain many many unnecessary entries in its string table. No behavior change, just makes the string table in the output smaller and more like ld64's. Differential Revision: https://reviews.llvm.org/D93711 --- lld/MachO/SyntheticSections.cpp | 22 +++++++++++----------- lld/test/MachO/symtab.s | 6 ++++-- 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp index 8b2ebd36e1ae8..2ed1f2eb34fbe 100644 --- a/lld/MachO/SyntheticSections.cpp +++ b/lld/MachO/SyntheticSections.cpp @@ -694,6 +694,11 @@ void SymtabSection::emitStabs() { } void SymtabSection::finalizeContents() { + auto addSymbol = [&](std::vector &symbols, Symbol *sym) { + uint32_t strx = stringTableSection.addString(sym->getName()); + symbols.push_back({sym, strx}); + }; + // Local symbols aren't in the SymbolTable, so we walk the list of object // files to gather them. for (InputFile *file : inputFiles) { @@ -702,10 +707,8 @@ void SymtabSection::finalizeContents() { // TODO: when we implement -dead_strip, we should filter out symbols // that belong to dead sections. if (auto *defined = dyn_cast(sym)) { - if (!defined->isExternal()) { - uint32_t strx = stringTableSection.addString(sym->getName()); - localSymbols.push_back({sym, strx}); - } + if (!defined->isExternal()) + addSymbol(localSymbols, sym); } } } @@ -713,19 +716,16 @@ void SymtabSection::finalizeContents() { // __dyld_private is a local symbol too. It's linker-created and doesn't // exist in any object file. - if (Defined* dyldPrivate = in.stubHelper->dyldPrivate) { - uint32_t strx = stringTableSection.addString(dyldPrivate->getName()); - localSymbols.push_back({dyldPrivate, strx}); - } + if (Defined* dyldPrivate = in.stubHelper->dyldPrivate) + addSymbol(localSymbols, dyldPrivate); for (Symbol *sym : symtab->getSymbols()) { - uint32_t strx = stringTableSection.addString(sym->getName()); if (auto *defined = dyn_cast(sym)) { assert(defined->isExternal()); - externalSymbols.push_back({sym, strx}); + addSymbol(externalSymbols, sym); } else if (auto *dysym = dyn_cast(sym)) { if (dysym->isReferenced()) - undefinedSymbols.push_back({sym, strx}); + addSymbol(undefinedSymbols, sym); } } diff --git a/lld/test/MachO/symtab.s b/lld/test/MachO/symtab.s index d18986c9d91c0..fa784a34e16a0 100644 --- a/lld/test/MachO/symtab.s +++ b/lld/test/MachO/symtab.s @@ -86,17 +86,19 @@ # CHECK-NEXT: iundefsym: 5 # CHECK-NEXT: nundefsym: 2 -## Verify that the first entry in the StringTable is a space. +## Verify that the first entry in the StringTable is a space, and that +## unreferenced symbols aren't emitted. # RUN: obj2yaml %t/test | FileCheck %s --check-prefix=YAML # YAML: StringTable: # YAML-NEXT: ' ' +# YAML-NOT: _unreferenced #--- libfoo.s .globl _dynamic _dynamic: #--- test.s -.globl _main, _external, _external_weak +.globl _main, _external, _external_weak, _unreferenced .data _external: From 0d15d4b6f43a3355c1d618766c8e550cfe1481d0 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Tue, 22 Dec 2020 14:13:39 -0500 Subject: [PATCH 128/378] [SLP] use operand index abstraction for number of operands I think this is NFC currently, but the bug would be exposed when we allow binary intrinsics (maxnum, etc) as candidates for reductions. The code in matchAssociativeReduction() is using OperationData::getNumberOfOperands() when comparing whether the "EdgeToVisit" iterator is in-bounds, so this code must use the same (potentially offset) operand value to set the "EdgeToVisit". --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index b03fb203c6d75..baa8ce2638a0d 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -6772,7 +6772,8 @@ class HorizontalReduction { // in this case. // Do not perform analysis of remaining operands of ParentStackElem.first // instruction, this whole instruction is an extra argument. - ParentStackElem.second = ParentStackElem.first->getNumOperands(); + OperationData OpData = getOperationData(ParentStackElem.first); + ParentStackElem.second = OpData.getNumberOfOperands(); } else { // We ran into something like: // ParentStackElem.first += ... + ExtraArg + ... From f6929c01952b3f144df620544ed937e801b9c945 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Tue, 22 Dec 2020 14:54:09 -0500 Subject: [PATCH 129/378] [SLP] add reduction tests for maxnum/minnum intrinsics; NFC --- .../Transforms/SLPVectorizer/X86/fmaxnum.ll | 147 ++++++++++++++++++ .../Transforms/SLPVectorizer/X86/fminnum.ll | 147 ++++++++++++++++++ 2 files changed, 294 insertions(+) diff --git a/llvm/test/Transforms/SLPVectorizer/X86/fmaxnum.ll b/llvm/test/Transforms/SLPVectorizer/X86/fmaxnum.ll index e03f3f808a4ff..23f2196b2425b 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/fmaxnum.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/fmaxnum.ll @@ -338,4 +338,151 @@ define void @fmaxnum_16f32() #0 { ret void } +define float @reduction_v4f32_fast(float* %p) { +; CHECK-LABEL: @reduction_v4f32_fast( +; CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds float, float* [[P:%.*]], i64 1 +; CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds float, float* [[P]], i64 2 +; CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds float, float* [[P]], i64 3 +; CHECK-NEXT: [[T0:%.*]] = load float, float* [[P]], align 4 +; CHECK-NEXT: [[T1:%.*]] = load float, float* [[G1]], align 4 +; CHECK-NEXT: [[T2:%.*]] = load float, float* [[G2]], align 4 +; CHECK-NEXT: [[T3:%.*]] = load float, float* [[G3]], align 4 +; CHECK-NEXT: [[M1:%.*]] = tail call fast float @llvm.maxnum.f32(float [[T1]], float [[T0]]) +; CHECK-NEXT: [[M2:%.*]] = tail call fast float @llvm.maxnum.f32(float [[T2]], float [[M1]]) +; CHECK-NEXT: [[M3:%.*]] = tail call fast float @llvm.maxnum.f32(float [[T3]], float [[M2]]) +; CHECK-NEXT: ret float [[M3]] +; + %g1 = getelementptr inbounds float, float* %p, i64 1 + %g2 = getelementptr inbounds float, float* %p, i64 2 + %g3 = getelementptr inbounds float, float* %p, i64 3 + %t0 = load float, float* %p, align 4 + %t1 = load float, float* %g1, align 4 + %t2 = load float, float* %g2, align 4 + %t3 = load float, float* %g3, align 4 + %m1 = tail call fast float @llvm.maxnum.f32(float %t1, float %t0) + %m2 = tail call fast float @llvm.maxnum.f32(float %t2, float %m1) + %m3 = tail call fast float @llvm.maxnum.f32(float %t3, float %m2) + ret float %m3 +} + +define float @reduction_v4f32_nnan(float* %p) { +; CHECK-LABEL: @reduction_v4f32_nnan( +; CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds float, float* [[P:%.*]], i64 1 +; CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds float, float* [[P]], i64 2 +; CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds float, float* [[P]], i64 3 +; CHECK-NEXT: [[T0:%.*]] = load float, float* [[P]], align 4 +; CHECK-NEXT: [[T1:%.*]] = load float, float* [[G1]], align 4 +; CHECK-NEXT: [[T2:%.*]] = load float, float* [[G2]], align 4 +; CHECK-NEXT: [[T3:%.*]] = load float, float* [[G3]], align 4 +; CHECK-NEXT: [[M1:%.*]] = tail call nnan float @llvm.maxnum.f32(float [[T1]], float [[T0]]) +; CHECK-NEXT: [[M2:%.*]] = tail call nnan float @llvm.maxnum.f32(float [[T2]], float [[M1]]) +; CHECK-NEXT: [[M3:%.*]] = tail call nnan float @llvm.maxnum.f32(float [[T3]], float [[M2]]) +; CHECK-NEXT: ret float [[M3]] +; + %g1 = getelementptr inbounds float, float* %p, i64 1 + %g2 = getelementptr inbounds float, float* %p, i64 2 + %g3 = getelementptr inbounds float, float* %p, i64 3 + %t0 = load float, float* %p, align 4 + %t1 = load float, float* %g1, align 4 + %t2 = load float, float* %g2, align 4 + %t3 = load float, float* %g3, align 4 + %m1 = tail call nnan float @llvm.maxnum.f32(float %t1, float %t0) + %m2 = tail call nnan float @llvm.maxnum.f32(float %t2, float %m1) + %m3 = tail call nnan float @llvm.maxnum.f32(float %t3, float %m2) + ret float %m3 +} + +define float @reduction_v8f32_fast(float* %p) { +; CHECK-LABEL: @reduction_v8f32_fast( +; CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds float, float* [[P:%.*]], i64 1 +; CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds float, float* [[P]], i64 2 +; CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds float, float* [[P]], i64 3 +; CHECK-NEXT: [[G4:%.*]] = getelementptr inbounds float, float* [[P]], i64 4 +; CHECK-NEXT: [[G5:%.*]] = getelementptr inbounds float, float* [[P]], i64 5 +; CHECK-NEXT: [[G6:%.*]] = getelementptr inbounds float, float* [[P]], i64 6 +; CHECK-NEXT: [[G7:%.*]] = getelementptr inbounds float, float* [[P]], i64 7 +; CHECK-NEXT: [[T0:%.*]] = load float, float* [[P]], align 4 +; CHECK-NEXT: [[T1:%.*]] = load float, float* [[G1]], align 4 +; CHECK-NEXT: [[T2:%.*]] = load float, float* [[G2]], align 4 +; CHECK-NEXT: [[T3:%.*]] = load float, float* [[G3]], align 4 +; CHECK-NEXT: [[T4:%.*]] = load float, float* [[G4]], align 4 +; CHECK-NEXT: [[T5:%.*]] = load float, float* [[G5]], align 4 +; CHECK-NEXT: [[T6:%.*]] = load float, float* [[G6]], align 4 +; CHECK-NEXT: [[T7:%.*]] = load float, float* [[G7]], align 4 +; CHECK-NEXT: [[M1:%.*]] = tail call fast float @llvm.maxnum.f32(float [[T1]], float [[T0]]) +; CHECK-NEXT: [[M2:%.*]] = tail call fast float @llvm.maxnum.f32(float [[T2]], float [[M1]]) +; CHECK-NEXT: [[M3:%.*]] = tail call fast float @llvm.maxnum.f32(float [[T3]], float [[M2]]) +; CHECK-NEXT: [[M4:%.*]] = tail call fast float @llvm.maxnum.f32(float [[T4]], float [[M3]]) +; CHECK-NEXT: [[M5:%.*]] = tail call fast float @llvm.maxnum.f32(float [[M4]], float [[T6]]) +; CHECK-NEXT: [[M6:%.*]] = tail call fast float @llvm.maxnum.f32(float [[M5]], float [[T5]]) +; CHECK-NEXT: [[M7:%.*]] = tail call fast float @llvm.maxnum.f32(float [[M6]], float [[T7]]) +; CHECK-NEXT: ret float [[M7]] +; + %g1 = getelementptr inbounds float, float* %p, i64 1 + %g2 = getelementptr inbounds float, float* %p, i64 2 + %g3 = getelementptr inbounds float, float* %p, i64 3 + %g4 = getelementptr inbounds float, float* %p, i64 4 + %g5 = getelementptr inbounds float, float* %p, i64 5 + %g6 = getelementptr inbounds float, float* %p, i64 6 + %g7 = getelementptr inbounds float, float* %p, i64 7 + %t0 = load float, float* %p, align 4 + %t1 = load float, float* %g1, align 4 + %t2 = load float, float* %g2, align 4 + %t3 = load float, float* %g3, align 4 + %t4 = load float, float* %g4, align 4 + %t5 = load float, float* %g5, align 4 + %t6 = load float, float* %g6, align 4 + %t7 = load float, float* %g7, align 4 + %m1 = tail call fast float @llvm.maxnum.f32(float %t1, float %t0) + %m2 = tail call fast float @llvm.maxnum.f32(float %t2, float %m1) + %m3 = tail call fast float @llvm.maxnum.f32(float %t3, float %m2) + %m4 = tail call fast float @llvm.maxnum.f32(float %t4, float %m3) + %m5 = tail call fast float @llvm.maxnum.f32(float %m4, float %t6) + %m6 = tail call fast float @llvm.maxnum.f32(float %m5, float %t5) + %m7 = tail call fast float @llvm.maxnum.f32(float %m6, float %t7) + ret float %m7 +} + +define double @reduction_v2f64_fast(double* %p) { +; CHECK-LABEL: @reduction_v2f64_fast( +; CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds double, double* [[P:%.*]], i64 1 +; CHECK-NEXT: [[T0:%.*]] = load double, double* [[P]], align 4 +; CHECK-NEXT: [[T1:%.*]] = load double, double* [[G1]], align 4 +; CHECK-NEXT: [[M1:%.*]] = tail call fast double @llvm.maxnum.f64(double [[T1]], double [[T0]]) +; CHECK-NEXT: ret double [[M1]] +; + %g1 = getelementptr inbounds double, double* %p, i64 1 + %t0 = load double, double* %p, align 4 + %t1 = load double, double* %g1, align 4 + %m1 = tail call fast double @llvm.maxnum.f64(double %t1, double %t0) + ret double %m1 +} + +define double @reduction_v4f64_fast(double* %p) { +; CHECK-LABEL: @reduction_v4f64_fast( +; CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds double, double* [[P:%.*]], i64 1 +; CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds double, double* [[P]], i64 2 +; CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds double, double* [[P]], i64 3 +; CHECK-NEXT: [[T0:%.*]] = load double, double* [[P]], align 4 +; CHECK-NEXT: [[T1:%.*]] = load double, double* [[G1]], align 4 +; CHECK-NEXT: [[T2:%.*]] = load double, double* [[G2]], align 4 +; CHECK-NEXT: [[T3:%.*]] = load double, double* [[G3]], align 4 +; CHECK-NEXT: [[M1:%.*]] = tail call fast double @llvm.maxnum.f64(double [[T1]], double [[T0]]) +; CHECK-NEXT: [[M2:%.*]] = tail call fast double @llvm.maxnum.f64(double [[T2]], double [[M1]]) +; CHECK-NEXT: [[M3:%.*]] = tail call fast double @llvm.maxnum.f64(double [[T3]], double [[M2]]) +; CHECK-NEXT: ret double [[M3]] +; + %g1 = getelementptr inbounds double, double* %p, i64 1 + %g2 = getelementptr inbounds double, double* %p, i64 2 + %g3 = getelementptr inbounds double, double* %p, i64 3 + %t0 = load double, double* %p, align 4 + %t1 = load double, double* %g1, align 4 + %t2 = load double, double* %g2, align 4 + %t3 = load double, double* %g3, align 4 + %m1 = tail call fast double @llvm.maxnum.f64(double %t1, double %t0) + %m2 = tail call fast double @llvm.maxnum.f64(double %t2, double %m1) + %m3 = tail call fast double @llvm.maxnum.f64(double %t3, double %m2) + ret double %m3 +} + attributes #0 = { nounwind } diff --git a/llvm/test/Transforms/SLPVectorizer/X86/fminnum.ll b/llvm/test/Transforms/SLPVectorizer/X86/fminnum.ll index b830d826c2e96..81bcfb2f1e9b7 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/fminnum.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/fminnum.ll @@ -338,4 +338,151 @@ define void @fminnum_16f32() #0 { ret void } +define float @reduction_v4f32_fast(float* %p) { +; CHECK-LABEL: @reduction_v4f32_fast( +; CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds float, float* [[P:%.*]], i64 1 +; CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds float, float* [[P]], i64 2 +; CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds float, float* [[P]], i64 3 +; CHECK-NEXT: [[T0:%.*]] = load float, float* [[P]], align 4 +; CHECK-NEXT: [[T1:%.*]] = load float, float* [[G1]], align 4 +; CHECK-NEXT: [[T2:%.*]] = load float, float* [[G2]], align 4 +; CHECK-NEXT: [[T3:%.*]] = load float, float* [[G3]], align 4 +; CHECK-NEXT: [[M1:%.*]] = tail call fast float @llvm.minnum.f32(float [[T1]], float [[T0]]) +; CHECK-NEXT: [[M2:%.*]] = tail call fast float @llvm.minnum.f32(float [[T2]], float [[M1]]) +; CHECK-NEXT: [[M3:%.*]] = tail call fast float @llvm.minnum.f32(float [[T3]], float [[M2]]) +; CHECK-NEXT: ret float [[M3]] +; + %g1 = getelementptr inbounds float, float* %p, i64 1 + %g2 = getelementptr inbounds float, float* %p, i64 2 + %g3 = getelementptr inbounds float, float* %p, i64 3 + %t0 = load float, float* %p, align 4 + %t1 = load float, float* %g1, align 4 + %t2 = load float, float* %g2, align 4 + %t3 = load float, float* %g3, align 4 + %m1 = tail call fast float @llvm.minnum.f32(float %t1, float %t0) + %m2 = tail call fast float @llvm.minnum.f32(float %t2, float %m1) + %m3 = tail call fast float @llvm.minnum.f32(float %t3, float %m2) + ret float %m3 +} + +define float @reduction_v4f32_nnan(float* %p) { +; CHECK-LABEL: @reduction_v4f32_nnan( +; CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds float, float* [[P:%.*]], i64 1 +; CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds float, float* [[P]], i64 2 +; CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds float, float* [[P]], i64 3 +; CHECK-NEXT: [[T0:%.*]] = load float, float* [[P]], align 4 +; CHECK-NEXT: [[T1:%.*]] = load float, float* [[G1]], align 4 +; CHECK-NEXT: [[T2:%.*]] = load float, float* [[G2]], align 4 +; CHECK-NEXT: [[T3:%.*]] = load float, float* [[G3]], align 4 +; CHECK-NEXT: [[M1:%.*]] = tail call nnan float @llvm.minnum.f32(float [[T1]], float [[T0]]) +; CHECK-NEXT: [[M2:%.*]] = tail call nnan float @llvm.minnum.f32(float [[T2]], float [[M1]]) +; CHECK-NEXT: [[M3:%.*]] = tail call nnan float @llvm.minnum.f32(float [[T3]], float [[M2]]) +; CHECK-NEXT: ret float [[M3]] +; + %g1 = getelementptr inbounds float, float* %p, i64 1 + %g2 = getelementptr inbounds float, float* %p, i64 2 + %g3 = getelementptr inbounds float, float* %p, i64 3 + %t0 = load float, float* %p, align 4 + %t1 = load float, float* %g1, align 4 + %t2 = load float, float* %g2, align 4 + %t3 = load float, float* %g3, align 4 + %m1 = tail call nnan float @llvm.minnum.f32(float %t1, float %t0) + %m2 = tail call nnan float @llvm.minnum.f32(float %t2, float %m1) + %m3 = tail call nnan float @llvm.minnum.f32(float %t3, float %m2) + ret float %m3 +} + +define float @reduction_v8f32_fast(float* %p) { +; CHECK-LABEL: @reduction_v8f32_fast( +; CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds float, float* [[P:%.*]], i64 1 +; CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds float, float* [[P]], i64 2 +; CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds float, float* [[P]], i64 3 +; CHECK-NEXT: [[G4:%.*]] = getelementptr inbounds float, float* [[P]], i64 4 +; CHECK-NEXT: [[G5:%.*]] = getelementptr inbounds float, float* [[P]], i64 5 +; CHECK-NEXT: [[G6:%.*]] = getelementptr inbounds float, float* [[P]], i64 6 +; CHECK-NEXT: [[G7:%.*]] = getelementptr inbounds float, float* [[P]], i64 7 +; CHECK-NEXT: [[T0:%.*]] = load float, float* [[P]], align 4 +; CHECK-NEXT: [[T1:%.*]] = load float, float* [[G1]], align 4 +; CHECK-NEXT: [[T2:%.*]] = load float, float* [[G2]], align 4 +; CHECK-NEXT: [[T3:%.*]] = load float, float* [[G3]], align 4 +; CHECK-NEXT: [[T4:%.*]] = load float, float* [[G4]], align 4 +; CHECK-NEXT: [[T5:%.*]] = load float, float* [[G5]], align 4 +; CHECK-NEXT: [[T6:%.*]] = load float, float* [[G6]], align 4 +; CHECK-NEXT: [[T7:%.*]] = load float, float* [[G7]], align 4 +; CHECK-NEXT: [[M1:%.*]] = tail call fast float @llvm.minnum.f32(float [[T1]], float [[T0]]) +; CHECK-NEXT: [[M2:%.*]] = tail call fast float @llvm.minnum.f32(float [[T2]], float [[M1]]) +; CHECK-NEXT: [[M3:%.*]] = tail call fast float @llvm.minnum.f32(float [[T3]], float [[M2]]) +; CHECK-NEXT: [[M4:%.*]] = tail call fast float @llvm.minnum.f32(float [[T4]], float [[M3]]) +; CHECK-NEXT: [[M5:%.*]] = tail call fast float @llvm.minnum.f32(float [[M4]], float [[T6]]) +; CHECK-NEXT: [[M6:%.*]] = tail call fast float @llvm.minnum.f32(float [[M5]], float [[T5]]) +; CHECK-NEXT: [[M7:%.*]] = tail call fast float @llvm.minnum.f32(float [[M6]], float [[T7]]) +; CHECK-NEXT: ret float [[M7]] +; + %g1 = getelementptr inbounds float, float* %p, i64 1 + %g2 = getelementptr inbounds float, float* %p, i64 2 + %g3 = getelementptr inbounds float, float* %p, i64 3 + %g4 = getelementptr inbounds float, float* %p, i64 4 + %g5 = getelementptr inbounds float, float* %p, i64 5 + %g6 = getelementptr inbounds float, float* %p, i64 6 + %g7 = getelementptr inbounds float, float* %p, i64 7 + %t0 = load float, float* %p, align 4 + %t1 = load float, float* %g1, align 4 + %t2 = load float, float* %g2, align 4 + %t3 = load float, float* %g3, align 4 + %t4 = load float, float* %g4, align 4 + %t5 = load float, float* %g5, align 4 + %t6 = load float, float* %g6, align 4 + %t7 = load float, float* %g7, align 4 + %m1 = tail call fast float @llvm.minnum.f32(float %t1, float %t0) + %m2 = tail call fast float @llvm.minnum.f32(float %t2, float %m1) + %m3 = tail call fast float @llvm.minnum.f32(float %t3, float %m2) + %m4 = tail call fast float @llvm.minnum.f32(float %t4, float %m3) + %m5 = tail call fast float @llvm.minnum.f32(float %m4, float %t6) + %m6 = tail call fast float @llvm.minnum.f32(float %m5, float %t5) + %m7 = tail call fast float @llvm.minnum.f32(float %m6, float %t7) + ret float %m7 +} + +define double @reduction_v2f64_fast(double* %p) { +; CHECK-LABEL: @reduction_v2f64_fast( +; CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds double, double* [[P:%.*]], i64 1 +; CHECK-NEXT: [[T0:%.*]] = load double, double* [[P]], align 4 +; CHECK-NEXT: [[T1:%.*]] = load double, double* [[G1]], align 4 +; CHECK-NEXT: [[M1:%.*]] = tail call fast double @llvm.minnum.f64(double [[T1]], double [[T0]]) +; CHECK-NEXT: ret double [[M1]] +; + %g1 = getelementptr inbounds double, double* %p, i64 1 + %t0 = load double, double* %p, align 4 + %t1 = load double, double* %g1, align 4 + %m1 = tail call fast double @llvm.minnum.f64(double %t1, double %t0) + ret double %m1 +} + +define double @reduction_v4f64_fast(double* %p) { +; CHECK-LABEL: @reduction_v4f64_fast( +; CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds double, double* [[P:%.*]], i64 1 +; CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds double, double* [[P]], i64 2 +; CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds double, double* [[P]], i64 3 +; CHECK-NEXT: [[T0:%.*]] = load double, double* [[P]], align 4 +; CHECK-NEXT: [[T1:%.*]] = load double, double* [[G1]], align 4 +; CHECK-NEXT: [[T2:%.*]] = load double, double* [[G2]], align 4 +; CHECK-NEXT: [[T3:%.*]] = load double, double* [[G3]], align 4 +; CHECK-NEXT: [[M1:%.*]] = tail call fast double @llvm.minnum.f64(double [[T1]], double [[T0]]) +; CHECK-NEXT: [[M2:%.*]] = tail call fast double @llvm.minnum.f64(double [[T2]], double [[M1]]) +; CHECK-NEXT: [[M3:%.*]] = tail call fast double @llvm.minnum.f64(double [[T3]], double [[M2]]) +; CHECK-NEXT: ret double [[M3]] +; + %g1 = getelementptr inbounds double, double* %p, i64 1 + %g2 = getelementptr inbounds double, double* %p, i64 2 + %g3 = getelementptr inbounds double, double* %p, i64 3 + %t0 = load double, double* %p, align 4 + %t1 = load double, double* %g1, align 4 + %t2 = load double, double* %g2, align 4 + %t3 = load double, double* %g3, align 4 + %m1 = tail call fast double @llvm.minnum.f64(double %t1, double %t0) + %m2 = tail call fast double @llvm.minnum.f64(double %t2, double %m1) + %m3 = tail call fast double @llvm.minnum.f64(double %t3, double %m2) + ret double %m3 +} + attributes #0 = { nounwind } From 3dbe471a260392ec63dda8deb2709160afc56dde Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Tue, 22 Dec 2020 21:36:41 +0100 Subject: [PATCH 130/378] [clangd] Use atomics instead of locks to track periodic memory trimming Instead of always locking/unlocking a contended mutex, we now do one atomic read in the common case, and one read + one exchange if the timer has expried. Also use this for memory profiling which has similar/compatible requirements. Differential Revision: https://reviews.llvm.org/D93726 --- clang-tools-extra/clangd/ClangdLSPServer.cpp | 37 ++++--------------- clang-tools-extra/clangd/ClangdLSPServer.h | 11 ++---- .../clangd/support/Threading.cpp | 12 ++++++ clang-tools-extra/clangd/support/Threading.h | 29 +++++++++++++++ .../unittests/support/ThreadingTests.cpp | 21 +++++++++++ 5 files changed, 73 insertions(+), 37 deletions(-) diff --git a/clang-tools-extra/clangd/ClangdLSPServer.cpp b/clang-tools-extra/clangd/ClangdLSPServer.cpp index 0c42f95fb5947..c606ccae4fdc0 100644 --- a/clang-tools-extra/clangd/ClangdLSPServer.cpp +++ b/clang-tools-extra/clangd/ClangdLSPServer.cpp @@ -1285,13 +1285,7 @@ void ClangdLSPServer::publishDiagnostics( } void ClangdLSPServer::maybeExportMemoryProfile() { - if (!trace::enabled()) - return; - // Profiling might be expensive, so we throttle it to happen once every 5 - // minutes. - static constexpr auto ProfileInterval = std::chrono::minutes(5); - auto Now = std::chrono::steady_clock::now(); - if (Now < NextProfileTime) + if (!trace::enabled() || !ShouldProfile()) return; static constexpr trace::Metric MemoryUsage( @@ -1300,27 +1294,11 @@ void ClangdLSPServer::maybeExportMemoryProfile() { MemoryTree MT; profile(MT); record(MT, "clangd_lsp_server", MemoryUsage); - NextProfileTime = Now + ProfileInterval; } void ClangdLSPServer::maybeCleanupMemory() { - // Memory cleanup is probably expensive, throttle it - static constexpr auto MemoryCleanupInterval = std::chrono::minutes(1); - - if (!Opts.MemoryCleanup) + if (!Opts.MemoryCleanup || !ShouldCleanupMemory()) return; - - // FIXME: this can probably be done without a mutex - // and the logic could be shared with maybeExportMemoryProfile - { - auto Now = std::chrono::steady_clock::now(); - std::lock_guard Lock(NextMemoryCleanupTimeMutex); - if (Now < NextMemoryCleanupTime) - return; - NextMemoryCleanupTime = Now + MemoryCleanupInterval; - } - - vlog("Calling memory cleanup callback"); Opts.MemoryCleanup(); } @@ -1481,10 +1459,15 @@ void ClangdLSPServer::onAST(const ASTParams &Params, ClangdLSPServer::ClangdLSPServer(class Transport &Transp, const ThreadsafeFS &TFS, const ClangdLSPServer::Options &Opts) - : BackgroundContext(Context::current().clone()), Transp(Transp), + : ShouldProfile(/*Period=*/std::chrono::minutes(5), + /*Delay=*/std::chrono::minutes(1)), + ShouldCleanupMemory(/*Period=*/std::chrono::minutes(1), + /*Delay=*/std::chrono::minutes(1)), + BackgroundContext(Context::current().clone()), Transp(Transp), MsgHandler(new MessageHandler(*this)), TFS(TFS), SupportedSymbolKinds(defaultSymbolKinds()), SupportedCompletionItemKinds(defaultCompletionItemKinds()), Opts(Opts) { + // clang-format off MsgHandler->bind("initialize", &ClangdLSPServer::onInitialize); MsgHandler->bind("initialized", &ClangdLSPServer::onInitialized); @@ -1529,10 +1512,6 @@ ClangdLSPServer::ClangdLSPServer(class Transport &Transp, if (Opts.FoldingRanges) MsgHandler->bind("textDocument/foldingRange", &ClangdLSPServer::onFoldingRange); // clang-format on - - // Delay first profile and memory cleanup until we've finished warming up. - NextMemoryCleanupTime = NextProfileTime = - std::chrono::steady_clock::now() + std::chrono::minutes(1); } ClangdLSPServer::~ClangdLSPServer() { diff --git a/clang-tools-extra/clangd/ClangdLSPServer.h b/clang-tools-extra/clangd/ClangdLSPServer.h index b5f9d2c9d766a..a41bc5666af33 100644 --- a/clang-tools-extra/clangd/ClangdLSPServer.h +++ b/clang-tools-extra/clangd/ClangdLSPServer.h @@ -19,6 +19,7 @@ #include "support/Context.h" #include "support/MemoryTree.h" #include "support/Path.h" +#include "support/Threading.h" #include "clang/Tooling/Core/Replacement.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/StringSet.h" @@ -186,18 +187,12 @@ class ClangdLSPServer : private ClangdServer::Callbacks { /// Runs profiling and exports memory usage metrics if tracing is enabled and /// profiling hasn't happened recently. void maybeExportMemoryProfile(); + PeriodicThrottler ShouldProfile; /// Run the MemoryCleanup callback if it's time. /// This method is thread safe. void maybeCleanupMemory(); - - /// Timepoint until which profiling is off. It is used to throttle profiling - /// requests. - std::chrono::steady_clock::time_point NextProfileTime; - - /// Next time we want to call the MemoryCleanup callback. - std::mutex NextMemoryCleanupTimeMutex; - std::chrono::steady_clock::time_point NextMemoryCleanupTime; + PeriodicThrottler ShouldCleanupMemory; /// Since initialization of CDBs and ClangdServer is done lazily, the /// following context captures the one used while creating ClangdLSPServer and diff --git a/clang-tools-extra/clangd/support/Threading.cpp b/clang-tools-extra/clangd/support/Threading.cpp index 5f95888ae3e2d..7f3bd62be306c 100644 --- a/clang-tools-extra/clangd/support/Threading.cpp +++ b/clang-tools-extra/clangd/support/Threading.cpp @@ -116,5 +116,17 @@ void wait(std::unique_lock &Lock, std::condition_variable &CV, CV.wait_until(Lock, D.time()); } +bool PeriodicThrottler::operator()() { + Rep Now = Stopwatch::now().time_since_epoch().count(); + Rep OldNext = Next.load(std::memory_order_acquire); + if (Now < OldNext) + return false; + // We're ready to run (but may be racing other threads). + // Work out the updated target time, and run if we successfully bump it. + Rep NewNext = Now + Period; + return Next.compare_exchange_strong(OldNext, NewNext, + std::memory_order_acq_rel); +} + } // namespace clangd } // namespace clang diff --git a/clang-tools-extra/clangd/support/Threading.h b/clang-tools-extra/clangd/support/Threading.h index 5155ac193fd18..da9e3b8ea8b68 100644 --- a/clang-tools-extra/clangd/support/Threading.h +++ b/clang-tools-extra/clangd/support/Threading.h @@ -169,6 +169,35 @@ template class Memoize { } }; +/// Used to guard an operation that should run at most every N seconds. +/// +/// Usage: +/// mutable PeriodicThrottler ShouldLog(std::chrono::seconds(1)); +/// void calledFrequently() { +/// if (ShouldLog()) +/// log("this is not spammy"); +/// } +/// +/// This class is threadsafe. If multiple threads are involved, then the guarded +/// operation still needs to be threadsafe! +class PeriodicThrottler { + using Stopwatch = std::chrono::steady_clock; + using Rep = Stopwatch::duration::rep; + + Rep Period; + std::atomic Next; + +public: + /// If Period is zero, the throttler will return true every time. + PeriodicThrottler(Stopwatch::duration Period, Stopwatch::duration Delay = {}) + : Period(Period.count()), + Next((Stopwatch::now() + Delay).time_since_epoch().count()) {} + + /// Returns whether the operation should run at this time. + /// operator() is safe to call concurrently. + bool operator()(); +}; + } // namespace clangd } // namespace clang #endif diff --git a/clang-tools-extra/clangd/unittests/support/ThreadingTests.cpp b/clang-tools-extra/clangd/unittests/support/ThreadingTests.cpp index e265ad2eabeae..87002d3cfa86a 100644 --- a/clang-tools-extra/clangd/unittests/support/ThreadingTests.cpp +++ b/clang-tools-extra/clangd/unittests/support/ThreadingTests.cpp @@ -10,6 +10,7 @@ #include "llvm/ADT/DenseMap.h" #include "gmock/gmock.h" #include "gtest/gtest.h" +#include #include namespace clang { @@ -121,5 +122,25 @@ TEST_F(ThreadingTest, MemoizeDeterministic) { ASSERT_THAT(ValueA.load(), testing::AnyOf('A', 'B')); } +// It's hard to write a real test of this class, std::chrono is awkward to mock. +// But test some degenerate cases at least. +TEST(PeriodicThrottlerTest, Minimal) { + PeriodicThrottler Once(std::chrono::hours(24)); + EXPECT_TRUE(Once()); + EXPECT_FALSE(Once()); + EXPECT_FALSE(Once()); + + PeriodicThrottler Later(std::chrono::hours(24), + /*Delay=*/std::chrono::hours(24)); + EXPECT_FALSE(Later()); + EXPECT_FALSE(Later()); + EXPECT_FALSE(Later()); + + PeriodicThrottler Always(std::chrono::seconds(0)); + EXPECT_TRUE(Always()); + EXPECT_TRUE(Always()); + EXPECT_TRUE(Always()); +} + } // namespace clangd } // namespace clang From df6cbd37f57fd330e413c394a4653ea55393fcef Mon Sep 17 00:00:00 2001 From: Christian Sigg Date: Tue, 22 Dec 2020 17:42:59 +0100 Subject: [PATCH 131/378] [mlir] Lower gpu.memcpy to GPU runtime calls. Reviewed By: herhut Differential Revision: https://reviews.llvm.org/D93204 --- .../ConvertLaunchFuncToRuntimeCalls.cpp | 65 +++++++++++++++++++ .../lower-memcpy-to-gpu-runtime-calls.mlir | 19 ++++++ .../cuda-runtime-wrappers.cpp | 7 ++ .../rocm-runtime-wrappers.cpp | 5 ++ 4 files changed, 96 insertions(+) create mode 100644 mlir/test/Conversion/GPUCommon/lower-memcpy-to-gpu-runtime-calls.mlir diff --git a/mlir/lib/Conversion/GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp b/mlir/lib/Conversion/GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp index 3b4b39e57d557..41a079c44eea5 100644 --- a/mlir/lib/Conversion/GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp +++ b/mlir/lib/Conversion/GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp @@ -151,6 +151,12 @@ class ConvertOpToGpuRuntimeCallPattern : public ConvertOpToLLVMPattern { "mgpuMemFree", llvmVoidType, {llvmPointerType /* void *ptr */, llvmPointerType /* void *stream */}}; + FunctionCallBuilder memcpyCallBuilder = { + "mgpuMemcpy", + llvmVoidType, + {llvmPointerType /* void *dst */, llvmPointerType /* void *src */, + llvmIntPtrType /* intptr_t sizeBytes */, + llvmPointerType /* void *stream */}}; }; /// A rewrite pattern to convert gpu.host_register operations into a GPU runtime @@ -268,6 +274,20 @@ class EraseGpuModuleOpPattern : public OpRewritePattern { return success(); } }; + +/// A rewrite pattern to convert gpu.memcpy operations into a GPU runtime +/// call. Currently it supports CUDA and ROCm (HIP). +class ConvertMemcpyOpToGpuRuntimeCallPattern + : public ConvertOpToGpuRuntimeCallPattern { +public: + ConvertMemcpyOpToGpuRuntimeCallPattern(LLVMTypeConverter &typeConverter) + : ConvertOpToGpuRuntimeCallPattern(typeConverter) {} + +private: + LogicalResult + matchAndRewrite(gpu::MemcpyOp memcpyOp, ArrayRef operands, + ConversionPatternRewriter &rewriter) const override; +}; } // namespace void GpuToLLVMConversionPass::runOnOperation() { @@ -643,6 +663,50 @@ LogicalResult ConvertLaunchFuncOpToGpuRuntimeCallPattern::matchAndRewrite( return success(); } +LogicalResult ConvertMemcpyOpToGpuRuntimeCallPattern::matchAndRewrite( + gpu::MemcpyOp memcpyOp, ArrayRef operands, + ConversionPatternRewriter &rewriter) const { + auto memRefType = memcpyOp.src().getType().cast(); + + if (failed(areAllLLVMTypes(memcpyOp, operands, rewriter)) || + !isSupportedMemRefType(memRefType) || + failed(isAsyncWithOneDependency(rewriter, memcpyOp))) + return failure(); + + auto loc = memcpyOp.getLoc(); + auto adaptor = gpu::MemcpyOpAdaptor(operands, memcpyOp->getAttrDictionary()); + + MemRefDescriptor srcDesc(adaptor.src()); + + Value numElements = + memRefType.hasStaticShape() + ? createIndexConstant(rewriter, loc, memRefType.getNumElements()) + // For identity layouts (verified above), the number of elements is + // stride[0] * size[0]. + : rewriter.create(loc, srcDesc.stride(rewriter, loc, 0), + srcDesc.size(rewriter, loc, 0)); + + Type elementPtrType = getElementPtrType(memRefType); + Value nullPtr = rewriter.create(loc, elementPtrType); + Value gepPtr = rewriter.create( + loc, elementPtrType, ArrayRef{nullPtr, numElements}); + auto sizeBytes = + rewriter.create(loc, getIndexType(), gepPtr); + + auto src = rewriter.create( + loc, llvmPointerType, srcDesc.alignedPtr(rewriter, loc)); + auto dst = rewriter.create( + loc, llvmPointerType, + MemRefDescriptor(adaptor.dst()).alignedPtr(rewriter, loc)); + + auto stream = adaptor.asyncDependencies().front(); + memcpyCallBuilder.create(loc, rewriter, {dst, src, sizeBytes, stream}); + + rewriter.replaceOp(memcpyOp, {stream}); + + return success(); +} + std::unique_ptr> mlir::createGpuToLLVMConversionPass(StringRef gpuBinaryAnnotation) { return std::make_unique(gpuBinaryAnnotation); @@ -658,6 +722,7 @@ void mlir::populateGpuToLLVMConversionPatterns( patterns.insert(converter); patterns.insert( diff --git a/mlir/test/Conversion/GPUCommon/lower-memcpy-to-gpu-runtime-calls.mlir b/mlir/test/Conversion/GPUCommon/lower-memcpy-to-gpu-runtime-calls.mlir new file mode 100644 index 0000000000000..790c92f92ec96 --- /dev/null +++ b/mlir/test/Conversion/GPUCommon/lower-memcpy-to-gpu-runtime-calls.mlir @@ -0,0 +1,19 @@ +// RUN: mlir-opt -allow-unregistered-dialect %s --gpu-to-llvm | FileCheck %s + +module attributes {gpu.container_module} { + + // CHECK: func @foo + func @foo(%dst : memref<7xf32, 1>, %src : memref<7xf32>) { + // CHECK: %[[t0:.*]] = llvm.call @mgpuStreamCreate + %t0 = gpu.wait async + // CHECK: %[[size_bytes:.*]] = llvm.ptrtoint + // CHECK: %[[src:.*]] = llvm.bitcast + // CHECK: %[[dst:.*]] = llvm.bitcast + // CHECK: llvm.call @mgpuMemcpy(%[[dst]], %[[src]], %[[size_bytes]], %[[t0]]) + %t1 = gpu.memcpy async [%t0] %dst, %src : memref<7xf32, 1>, memref<7xf32> + // CHECK: llvm.call @mgpuStreamSynchronize(%[[t0]]) + // CHECK: llvm.call @mgpuStreamDestroy(%[[t0]]) + gpu.wait [%t1] + return + } +} diff --git a/mlir/tools/mlir-cuda-runner/cuda-runtime-wrappers.cpp b/mlir/tools/mlir-cuda-runner/cuda-runtime-wrappers.cpp index a6729b1c0b7d1..72d172889d301 100644 --- a/mlir/tools/mlir-cuda-runner/cuda-runtime-wrappers.cpp +++ b/mlir/tools/mlir-cuda-runner/cuda-runtime-wrappers.cpp @@ -117,6 +117,13 @@ extern "C" void mgpuMemFree(void *ptr, CUstream /*stream*/) { CUDA_REPORT_IF_ERROR(cuMemFree(reinterpret_cast(ptr))); } +extern "C" void mgpuMemcpy(void *dst, void *src, uint64_t sizeBytes, + CUstream stream) { + CUDA_REPORT_IF_ERROR(cuMemcpyAsync(reinterpret_cast(dst), + reinterpret_cast(src), + sizeBytes, stream)); +} + /// Helper functions for writing mlir example code // Allows to register byte array with the CUDA runtime. Helpful until we have diff --git a/mlir/tools/mlir-rocm-runner/rocm-runtime-wrappers.cpp b/mlir/tools/mlir-rocm-runner/rocm-runtime-wrappers.cpp index aad7ae27ff892..4f62f204f4a83 100644 --- a/mlir/tools/mlir-rocm-runner/rocm-runtime-wrappers.cpp +++ b/mlir/tools/mlir-rocm-runner/rocm-runtime-wrappers.cpp @@ -118,6 +118,11 @@ extern "C" void mgpuMemFree(void *ptr, hipStream_t /*stream*/) { HIP_REPORT_IF_ERROR(hipMemFree(ptr)); } +extern "C" void mgpuMemcpy(void *dst, void *src, uint64_t sizeBytes, + hipStream_t stream) { + HIP_REPORT_IF_ERROR(hipMemcpyAsync(dst, src, sizeBytes, stream)); +} + /// Helper functions for writing mlir example code // Allows to register byte array with the ROCM runtime. Helpful until we have From f7a26127f21fb1ca8252879ca647835ea7c5903d Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Tue, 22 Dec 2020 22:58:39 +0100 Subject: [PATCH 132/378] [clangd] Release notes for b8c37153d5393aad96 --- clang-tools-extra/docs/ReleaseNotes.rst | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index 450b80fd45814..2960aad5a5569 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -47,7 +47,17 @@ Major New Features Improvements to clangd ---------------------- -The improvements are... +- clangd's memory usage is significantly reduced on most Linux systems. + In particular, memory usage should not increase dramatically over time. + + The standard allocator on most systems is glibc's ptmalloc2, and it creates + disproportionately large heaps when handling clangd's allocation patterns. + By default, clangd will now periodically call ``malloc_trim`` to release free + pages on glibc systems. + + Users of other allocators (such as ``jemalloc`` or ``tcmalloc``) on glibc + systems can disable this using ``--malloc_trim=0`` or the CMake flag + ``-DCLANGD_MALLOC_TRIM=0``. Improvements to clang-doc ------------------------- From a781a706b961a348006b604cdff8b555e62a2fcb Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Tue, 22 Dec 2020 14:29:06 -0800 Subject: [PATCH 133/378] [WebAssembly][SIMD] Rename shuffle, swizzle, and load_splats These instructions previously used prefixes like v8x16 to signify that they were agnostic between float and int interpretations. We renamed these instructions to remove this form of prefix in https://github.com/WebAssembly/simd/issues/297 and https://github.com/WebAssembly/simd/issues/316 and this commit brings the names in LLVM up to date. Differential Revision: https://reviews.llvm.org/D93722 --- .../MCTargetDesc/WebAssemblyMCTargetDesc.h | 8 +- .../WebAssembly/WebAssemblyInstrSIMD.td | 46 ++++----- .../CodeGen/WebAssembly/simd-build-vector.ll | 6 +- .../CodeGen/WebAssembly/simd-intrinsics.ll | 10 +- .../CodeGen/WebAssembly/simd-load-splat.ll | 2 +- .../WebAssembly/simd-load-store-alignment.ll | 36 +++---- .../WebAssembly/simd-nested-shuffles.ll | 2 +- llvm/test/CodeGen/WebAssembly/simd-offset.ll | 96 +++++++++---------- .../WebAssembly/simd-shift-complex-splats.ll | 2 +- .../WebAssembly/simd-shuffle-bitcast.ll | 2 +- llvm/test/CodeGen/WebAssembly/simd.ll | 48 +++++----- .../test/MC/Disassembler/WebAssembly/wasm.txt | 2 +- llvm/test/MC/WebAssembly/simd-encodings.s | 24 ++--- 13 files changed, 142 insertions(+), 142 deletions(-) diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h index fccee4b96ed51..4bc77aa68668b 100644 --- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h +++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h @@ -194,7 +194,7 @@ inline unsigned GetDefaultP2AlignAny(unsigned Opc) { WASM_LOAD_STORE(ATOMIC_RMW8_U_XCHG_I64) WASM_LOAD_STORE(ATOMIC_RMW8_U_CMPXCHG_I32) WASM_LOAD_STORE(ATOMIC_RMW8_U_CMPXCHG_I64) - WASM_LOAD_STORE(LOAD_SPLAT_v8x16) + WASM_LOAD_STORE(LOAD8_SPLAT) WASM_LOAD_STORE(LOAD_LANE_v16i8) WASM_LOAD_STORE(STORE_LANE_v16i8) return 0; @@ -222,7 +222,7 @@ inline unsigned GetDefaultP2AlignAny(unsigned Opc) { WASM_LOAD_STORE(ATOMIC_RMW16_U_XCHG_I64) WASM_LOAD_STORE(ATOMIC_RMW16_U_CMPXCHG_I32) WASM_LOAD_STORE(ATOMIC_RMW16_U_CMPXCHG_I64) - WASM_LOAD_STORE(LOAD_SPLAT_v16x8) + WASM_LOAD_STORE(LOAD16_SPLAT) WASM_LOAD_STORE(LOAD_LANE_v8i16) WASM_LOAD_STORE(STORE_LANE_v8i16) return 1; @@ -253,7 +253,7 @@ inline unsigned GetDefaultP2AlignAny(unsigned Opc) { WASM_LOAD_STORE(ATOMIC_RMW32_U_CMPXCHG_I64) WASM_LOAD_STORE(MEMORY_ATOMIC_NOTIFY) WASM_LOAD_STORE(MEMORY_ATOMIC_WAIT32) - WASM_LOAD_STORE(LOAD_SPLAT_v32x4) + WASM_LOAD_STORE(LOAD32_SPLAT) WASM_LOAD_STORE(LOAD_ZERO_v4i32) WASM_LOAD_STORE(LOAD_LANE_v4i32) WASM_LOAD_STORE(STORE_LANE_v4i32) @@ -272,7 +272,7 @@ inline unsigned GetDefaultP2AlignAny(unsigned Opc) { WASM_LOAD_STORE(ATOMIC_RMW_XCHG_I64) WASM_LOAD_STORE(ATOMIC_RMW_CMPXCHG_I64) WASM_LOAD_STORE(MEMORY_ATOMIC_WAIT64) - WASM_LOAD_STORE(LOAD_SPLAT_v64x2) + WASM_LOAD_STORE(LOAD64_SPLAT) WASM_LOAD_STORE(LOAD_EXTEND_S_v8i16) WASM_LOAD_STORE(LOAD_EXTEND_U_v8i16) WASM_LOAD_STORE(LOAD_EXTEND_S_v4i32) diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td index 191cdea0c0aee..e48bbaebd47e9 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td @@ -64,55 +64,55 @@ defm : LoadPatOffsetOnly; defm : LoadPatGlobalAddrOffOnly; } -// vNxM.load_splat -multiclass SIMDLoadSplat simdop> { +// v128.loadX_splat +multiclass SIMDLoadSplat simdop> { let mayLoad = 1, UseNamedOperandTable = 1 in { - defm LOAD_SPLAT_#vec#_A32 : + defm LOAD#size#_SPLAT_A32 : SIMD_I<(outs V128:$dst), (ins P2Align:$p2align, offset32_op:$off, I32:$addr), (outs), (ins P2Align:$p2align, offset32_op:$off), [], - vec#".load_splat\t$dst, ${off}(${addr})$p2align", - vec#".load_splat\t$off$p2align", simdop>; - defm LOAD_SPLAT_#vec#_A64 : + "v128.load"#size#"_splat\t$dst, ${off}(${addr})$p2align", + "v128.load"#size#"_splat\t$off$p2align", simdop>; + defm LOAD#size#_SPLAT_A64 : SIMD_I<(outs V128:$dst), (ins P2Align:$p2align, offset64_op:$off, I64:$addr), (outs), (ins P2Align:$p2align, offset64_op:$off), [], - vec#".load_splat\t$dst, ${off}(${addr})$p2align", - vec#".load_splat\t$off$p2align", simdop>; + "v128.load"#size#"_splat\t$dst, ${off}(${addr})$p2align", + "v128.load"#size#"_splat\t$off$p2align", simdop>; } } -defm "" : SIMDLoadSplat<"v8x16", 7>; -defm "" : SIMDLoadSplat<"v16x8", 8>; -defm "" : SIMDLoadSplat<"v32x4", 9>; -defm "" : SIMDLoadSplat<"v64x2", 10>; +defm "" : SIMDLoadSplat<8, 7>; +defm "" : SIMDLoadSplat<16, 8>; +defm "" : SIMDLoadSplat<32, 9>; +defm "" : SIMDLoadSplat<64, 10>; def wasm_load_splat_t : SDTypeProfile<1, 1, [SDTCisPtrTy<1>]>; def wasm_load_splat : SDNode<"WebAssemblyISD::LOAD_SPLAT", wasm_load_splat_t, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; def load_splat : PatFrag<(ops node:$addr), (wasm_load_splat node:$addr)>; -foreach args = [["v16i8", "v8x16"], ["v8i16", "v16x8"], ["v4i32", "v32x4"], - ["v2i64", "v64x2"], ["v4f32", "v32x4"], ["v2f64", "v64x2"]] in { +foreach args = [["v16i8", "8"], ["v8i16", "16"], ["v4i32", "32"], + ["v2i64", "64"], ["v4f32", "32"], ["v2f64", "64"]] in { defm : LoadPatNoOffset(args[0]), load_splat, - "LOAD_SPLAT_"#args[1]>; + "LOAD"#args[1]#"_SPLAT">; defm : LoadPatImmOff(args[0]), load_splat, regPlusImm, - "LOAD_SPLAT_"#args[1]>; + "LOAD"#args[1]#"_SPLAT">; defm : LoadPatImmOff(args[0]), load_splat, or_is_add, - "LOAD_SPLAT_"#args[1]>; + "LOAD"#args[1]#"_SPLAT">; defm : LoadPatOffsetOnly(args[0]), load_splat, - "LOAD_SPLAT_"#args[1]>; + "LOAD"#args[1]#"_SPLAT">; defm : LoadPatGlobalAddrOffOnly(args[0]), load_splat, - "LOAD_SPLAT_"#args[1]>; + "LOAD"#args[1]#"_SPLAT">; } // Load and extend @@ -401,10 +401,10 @@ defm SHUFFLE : vec_i8imm_op:$mC, vec_i8imm_op:$mD, vec_i8imm_op:$mE, vec_i8imm_op:$mF), [], - "v8x16.shuffle\t$dst, $x, $y, "# + "i8x16.shuffle\t$dst, $x, $y, "# "$m0, $m1, $m2, $m3, $m4, $m5, $m6, $m7, "# "$m8, $m9, $mA, $mB, $mC, $mD, $mE, $mF", - "v8x16.shuffle\t"# + "i8x16.shuffle\t"# "$m0, $m1, $m2, $m3, $m4, $m5, $m6, $m7, "# "$m8, $m9, $mA, $mB, $mC, $mD, $mE, $mF", 13>; @@ -433,14 +433,14 @@ def : Pat<(vec_t (wasm_shuffle (vec_t V128:$x), (vec_t V128:$y), (i32 LaneIdx32:$mE), (i32 LaneIdx32:$mF)))>; } -// Swizzle lanes: v8x16.swizzle +// Swizzle lanes: i8x16.swizzle def wasm_swizzle_t : SDTypeProfile<1, 2, []>; def wasm_swizzle : SDNode<"WebAssemblyISD::SWIZZLE", wasm_swizzle_t>; defm SWIZZLE : SIMD_I<(outs V128:$dst), (ins V128:$src, V128:$mask), (outs), (ins), [(set (v16i8 V128:$dst), (wasm_swizzle (v16i8 V128:$src), (v16i8 V128:$mask)))], - "v8x16.swizzle\t$dst, $src, $mask", "v8x16.swizzle", 14>; + "i8x16.swizzle\t$dst, $src, $mask", "i8x16.swizzle", 14>; def : Pat<(int_wasm_swizzle (v16i8 V128:$src), (v16i8 V128:$mask)), (SWIZZLE V128:$src, V128:$mask)>; diff --git a/llvm/test/CodeGen/WebAssembly/simd-build-vector.ll b/llvm/test/CodeGen/WebAssembly/simd-build-vector.ll index 4f75887873784..1360e0172d3fd 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-build-vector.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-build-vector.ll @@ -178,7 +178,7 @@ define <8 x i16> @splat_common_arg_i16x8(i16 %a, i16 %b, i16 %c) { ; CHECK-LABEL: swizzle_one_i8x16: ; CHECK-NEXT: .functype swizzle_one_i8x16 (v128, v128) -> (v128) -; CHECK-NEXT: v8x16.swizzle $push[[L0:[0-9]+]]=, $0, $1 +; CHECK-NEXT: i8x16.swizzle $push[[L0:[0-9]+]]=, $0, $1 ; CHECK-NEXT: return $pop[[L0]] define <16 x i8> @swizzle_one_i8x16(<16 x i8> %src, <16 x i8> %mask) { %m0 = extractelement <16 x i8> %mask, i32 0 @@ -189,7 +189,7 @@ define <16 x i8> @swizzle_one_i8x16(<16 x i8> %src, <16 x i8> %mask) { ; CHECK-LABEL: swizzle_all_i8x16: ; CHECK-NEXT: .functype swizzle_all_i8x16 (v128, v128) -> (v128) -; CHECK-NEXT: v8x16.swizzle $push[[L0:[0-9]+]]=, $0, $1 +; CHECK-NEXT: i8x16.swizzle $push[[L0:[0-9]+]]=, $0, $1 ; CHECK-NEXT: return $pop[[L0]] define <16 x i8> @swizzle_all_i8x16(<16 x i8> %src, <16 x i8> %mask) { %m0 = extractelement <16 x i8> %mask, i32 0 @@ -256,7 +256,7 @@ define <8 x i16> @swizzle_one_i16x8(<8 x i16> %src, <8 x i16> %mask) { ; CHECK-LABEL: mashup_swizzle_i8x16: ; CHECK-NEXT: .functype mashup_swizzle_i8x16 (v128, v128, i32) -> (v128) -; CHECK-NEXT: v8x16.swizzle $push[[L0:[0-9]+]]=, $0, $1 +; CHECK-NEXT: i8x16.swizzle $push[[L0:[0-9]+]]=, $0, $1 ; CHECK: i8x16.replace_lane ; CHECK: i8x16.replace_lane ; CHECK: i8x16.replace_lane diff --git a/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll b/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll index 23a7bfbde927a..da7343770de77 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll @@ -13,7 +13,7 @@ target triple = "wasm32-unknown-unknown" ; ============================================================================== ; CHECK-LABEL: swizzle_v16i8: ; SIMD128-NEXT: .functype swizzle_v16i8 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: v8x16.swizzle $push[[R:[0-9]+]]=, $0, $1{{$}} +; SIMD128-NEXT: i8x16.swizzle $push[[R:[0-9]+]]=, $0, $1{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} declare <16 x i8> @llvm.wasm.swizzle(<16 x i8>, <16 x i8>) define <16 x i8> @swizzle_v16i8(<16 x i8> %x, <16 x i8> %y) { @@ -164,9 +164,9 @@ define <16 x i8> @narrow_unsigned_v16i8(<8 x i16> %low, <8 x i16> %high) { } ; CHECK-LABEL: shuffle_v16i8: -; NO-SIMD128-NOT: v8x16 +; NO-SIMD128-NOT: i8x16 ; SIMD128-NEXT: .functype shuffle_v16i8 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: v8x16.shuffle $push[[R:[0-9]+]]=, $0, $1, +; SIMD128-NEXT: i8x16.shuffle $push[[R:[0-9]+]]=, $0, $1, ; SIMD128-SAME: 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 0{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} declare <16 x i8> @llvm.wasm.shuffle( @@ -180,9 +180,9 @@ define <16 x i8> @shuffle_v16i8(<16 x i8> %x, <16 x i8> %y) { } ; CHECK-LABEL: shuffle_undef_v16i8: -; NO-SIMD128-NOT: v8x16 +; NO-SIMD128-NOT: i8x16 ; SIMD128-NEXT: .functype shuffle_undef_v16i8 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: v8x16.shuffle $push[[R:[0-9]+]]=, $0, $1, +; SIMD128-NEXT: i8x16.shuffle $push[[R:[0-9]+]]=, $0, $1, ; SIMD128-SAME: 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <16 x i8> @shuffle_undef_v16i8(<16 x i8> %x, <16 x i8> %y) { diff --git a/llvm/test/CodeGen/WebAssembly/simd-load-splat.ll b/llvm/test/CodeGen/WebAssembly/simd-load-splat.ll index 4e693c285a3fa..3d08a586edb5a 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-load-splat.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-load-splat.ll @@ -9,7 +9,7 @@ target triple = "wasm32-unknown-unknown" ; CHECK-LABEL: load_splat: ; CHECK-NEXT: .functype load_splat (i32, i32) -> (i32) ; CHECK-NEXT: i32.load8_u $[[E:[0-9]+]]=, 0($0){{$}} -; CHECK-NEXT: v8x16.load_splat $push[[V:[0-9]+]]=, 0($0){{$}} +; CHECK-NEXT: v128.load8_splat $push[[V:[0-9]+]]=, 0($0){{$}} ; CHECK-NEXT: v128.store 0($1), $pop[[V]]{{$}} ; CHECK-NEXT: return $[[E]]{{$}} define i8 @load_splat(i8* %p, <16 x i8>* %out) { diff --git a/llvm/test/CodeGen/WebAssembly/simd-load-store-alignment.ll b/llvm/test/CodeGen/WebAssembly/simd-load-store-alignment.ll index 8ebeb15ccc9a2..000b7730e3bf2 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-load-store-alignment.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-load-store-alignment.ll @@ -89,11 +89,11 @@ define void @store_v16i8_a32(<16 x i8> *%p, <16 x i8> %v) { ret void } -; 1 is the default alignment for v8x16.load_splat so no attribute is needed. +; 1 is the default alignment for v128.load8_splat so no attribute is needed. ; CHECK-LABEL: load_splat_v16i8_a1: ; CHECK-NEXT: .functype load_splat_v16i8_a1 (i32) -> (v128){{$}} -; CHECK-NEXT: v8x16.load_splat $push[[R:[0-9]+]]=, 0($0){{$}} +; CHECK-NEXT: v128.load8_splat $push[[R:[0-9]+]]=, 0($0){{$}} ; CHECK-NEXT: return $pop[[R]]{{$}} define <16 x i8> @load_splat_v16i8_a1(i8* %p) { %e = load i8, i8* %p, align 1 @@ -106,7 +106,7 @@ define <16 x i8> @load_splat_v16i8_a1(i8* %p) { ; CHECK-LABEL: load_splat_v16i8_a2: ; CHECK-NEXT: .functype load_splat_v16i8_a2 (i32) -> (v128){{$}} -; CHECK-NEXT: v8x16.load_splat $push[[R:[0-9]+]]=, 0($0){{$}} +; CHECK-NEXT: v128.load8_splat $push[[R:[0-9]+]]=, 0($0){{$}} ; CHECK-NEXT: return $pop[[R]]{{$}} define <16 x i8> @load_splat_v16i8_a2(i8* %p) { %e = load i8, i8* %p, align 2 @@ -304,7 +304,7 @@ define <8 x i16> @load_sext_v8i16_a16(<8 x i8>* %p) { ; CHECK-LABEL: load_splat_v8i16_a1: ; CHECK-NEXT: .functype load_splat_v8i16_a1 (i32) -> (v128){{$}} -; CHECK-NEXT: v16x8.load_splat $push[[R:[0-9]+]]=, 0($0):p2align=0{{$}} +; CHECK-NEXT: v128.load16_splat $push[[R:[0-9]+]]=, 0($0):p2align=0{{$}} ; CHECK-NEXT: return $pop[[R]]{{$}} define <8 x i16> @load_splat_v8i16_a1(i16* %p) { %e = load i16, i16* %p, align 1 @@ -313,11 +313,11 @@ define <8 x i16> @load_splat_v8i16_a1(i16* %p) { ret <8 x i16> %v2 } -; 2 is the default alignment for v16x8.load_splat so no attribute is needed. +; 2 is the default alignment for v128.load16_splat so no attribute is needed. ; CHECK-LABEL: load_splat_v8i16_a2: ; CHECK-NEXT: .functype load_splat_v8i16_a2 (i32) -> (v128){{$}} -; CHECK-NEXT: v16x8.load_splat $push[[R:[0-9]+]]=, 0($0){{$}} +; CHECK-NEXT: v128.load16_splat $push[[R:[0-9]+]]=, 0($0){{$}} ; CHECK-NEXT: return $pop[[R]]{{$}} define <8 x i16> @load_splat_v8i16_a2(i16* %p) { %e = load i16, i16* %p, align 2 @@ -330,7 +330,7 @@ define <8 x i16> @load_splat_v8i16_a2(i16* %p) { ; CHECK-LABEL: load_splat_v8i16_a4: ; CHECK-NEXT: .functype load_splat_v8i16_a4 (i32) -> (v128){{$}} -; CHECK-NEXT: v16x8.load_splat $push[[R:[0-9]+]]=, 0($0){{$}} +; CHECK-NEXT: v128.load16_splat $push[[R:[0-9]+]]=, 0($0){{$}} ; CHECK-NEXT: return $pop[[R]]{{$}} define <8 x i16> @load_splat_v8i16_a4(i16* %p) { %e = load i16, i16* %p, align 4 @@ -528,7 +528,7 @@ define <4 x i32> @load_sext_v4i32_a16(<4 x i16>* %p) { ; CHECK-LABEL: load_splat_v4i32_a1: ; CHECK-NEXT: .functype load_splat_v4i32_a1 (i32) -> (v128){{$}} -; CHECK-NEXT: v32x4.load_splat $push[[R:[0-9]+]]=, 0($0):p2align=0{{$}} +; CHECK-NEXT: v128.load32_splat $push[[R:[0-9]+]]=, 0($0):p2align=0{{$}} ; CHECK-NEXT: return $pop[[R]]{{$}} define <4 x i32> @load_splat_v4i32_a1(i32* %addr) { %e = load i32, i32* %addr, align 1 @@ -539,7 +539,7 @@ define <4 x i32> @load_splat_v4i32_a1(i32* %addr) { ; CHECK-LABEL: load_splat_v4i32_a2: ; CHECK-NEXT: .functype load_splat_v4i32_a2 (i32) -> (v128){{$}} -; CHECK-NEXT: v32x4.load_splat $push[[R:[0-9]+]]=, 0($0):p2align=1{{$}} +; CHECK-NEXT: v128.load32_splat $push[[R:[0-9]+]]=, 0($0):p2align=1{{$}} ; CHECK-NEXT: return $pop[[R]]{{$}} define <4 x i32> @load_splat_v4i32_a2(i32* %addr) { %e = load i32, i32* %addr, align 2 @@ -548,11 +548,11 @@ define <4 x i32> @load_splat_v4i32_a2(i32* %addr) { ret <4 x i32> %v2 } -; 4 is the default alignment for v32x4.load_splat so no attribute is needed. +; 4 is the default alignment for v128.load32_splat so no attribute is needed. ; CHECK-LABEL: load_splat_v4i32_a4: ; CHECK-NEXT: .functype load_splat_v4i32_a4 (i32) -> (v128){{$}} -; CHECK-NEXT: v32x4.load_splat $push[[R:[0-9]+]]=, 0($0){{$}} +; CHECK-NEXT: v128.load32_splat $push[[R:[0-9]+]]=, 0($0){{$}} ; CHECK-NEXT: return $pop[[R]]{{$}} define <4 x i32> @load_splat_v4i32_a4(i32* %addr) { %e = load i32, i32* %addr, align 4 @@ -565,7 +565,7 @@ define <4 x i32> @load_splat_v4i32_a4(i32* %addr) { ; CHECK-LABEL: load_splat_v4i32_a8: ; CHECK-NEXT: .functype load_splat_v4i32_a8 (i32) -> (v128){{$}} -; CHECK-NEXT: v32x4.load_splat $push[[R:[0-9]+]]=, 0($0){{$}} +; CHECK-NEXT: v128.load32_splat $push[[R:[0-9]+]]=, 0($0){{$}} ; CHECK-NEXT: return $pop[[R]]{{$}} define <4 x i32> @load_splat_v4i32_a8(i32* %addr) { %e = load i32, i32* %addr, align 8 @@ -660,7 +660,7 @@ define void @store_v2i64_a32(<2 x i64> *%p, <2 x i64> %v) { ; CHECK-LABEL: load_splat_v2i64_a1: ; CHECK-NEXT: .functype load_splat_v2i64_a1 (i32) -> (v128){{$}} -; CHECK-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, 0($0):p2align=0{{$}} +; CHECK-NEXT: v128.load64_splat $push[[R:[0-9]+]]=, 0($0):p2align=0{{$}} ; CHECK-NEXT: return $pop[[R]]{{$}} define <2 x i64> @load_splat_v2i64_a1(i64* %p) { %e = load i64, i64* %p, align 1 @@ -671,7 +671,7 @@ define <2 x i64> @load_splat_v2i64_a1(i64* %p) { ; CHECK-LABEL: load_splat_v2i64_a2: ; CHECK-NEXT: .functype load_splat_v2i64_a2 (i32) -> (v128){{$}} -; CHECK-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, 0($0):p2align=1{{$}} +; CHECK-NEXT: v128.load64_splat $push[[R:[0-9]+]]=, 0($0):p2align=1{{$}} ; CHECK-NEXT: return $pop[[R]]{{$}} define <2 x i64> @load_splat_v2i64_a2(i64* %p) { %e = load i64, i64* %p, align 2 @@ -682,7 +682,7 @@ define <2 x i64> @load_splat_v2i64_a2(i64* %p) { ; CHECK-LABEL: load_splat_v2i64_a4: ; CHECK-NEXT: .functype load_splat_v2i64_a4 (i32) -> (v128){{$}} -; CHECK-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, 0($0):p2align=2{{$}} +; CHECK-NEXT: v128.load64_splat $push[[R:[0-9]+]]=, 0($0):p2align=2{{$}} ; CHECK-NEXT: return $pop[[R]]{{$}} define <2 x i64> @load_splat_v2i64_a4(i64* %p) { %e = load i64, i64* %p, align 4 @@ -691,11 +691,11 @@ define <2 x i64> @load_splat_v2i64_a4(i64* %p) { ret <2 x i64> %v2 } -; 8 is the default alignment for v64x2.load_splat so no attribute is needed. +; 8 is the default alignment for v128.load64_splat so no attribute is needed. ; CHECK-LABEL: load_splat_v2i64_a8: ; CHECK-NEXT: .functype load_splat_v2i64_a8 (i32) -> (v128){{$}} -; CHECK-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, 0($0){{$}} +; CHECK-NEXT: v128.load64_splat $push[[R:[0-9]+]]=, 0($0){{$}} ; CHECK-NEXT: return $pop[[R]]{{$}} define <2 x i64> @load_splat_v2i64_a8(i64* %p) { %e = load i64, i64* %p, align 8 @@ -708,7 +708,7 @@ define <2 x i64> @load_splat_v2i64_a8(i64* %p) { ; CHECK-LABEL: load_splat_v2i64_a16: ; CHECK-NEXT: .functype load_splat_v2i64_a16 (i32) -> (v128){{$}} -; CHECK-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, 0($0){{$}} +; CHECK-NEXT: v128.load64_splat $push[[R:[0-9]+]]=, 0($0){{$}} ; CHECK-NEXT: return $pop[[R]]{{$}} define <2 x i64> @load_splat_v2i64_a16(i64* %p) { %e = load i64, i64* %p, align 16 diff --git a/llvm/test/CodeGen/WebAssembly/simd-nested-shuffles.ll b/llvm/test/CodeGen/WebAssembly/simd-nested-shuffles.ll index 597ab58e879e0..b72086a2d6cb9 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-nested-shuffles.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-nested-shuffles.ll @@ -6,7 +6,7 @@ target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" target triple = "wasm32-unknown-unknown" -; CHECK: v8x16.shuffle +; CHECK: i8x16.shuffle define <4 x i32> @foo(<4 x i32> %x) { %1 = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> diff --git a/llvm/test/CodeGen/WebAssembly/simd-offset.ll b/llvm/test/CodeGen/WebAssembly/simd-offset.ll index b2d32936df130..fb41653a514a0 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-offset.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-offset.ll @@ -25,7 +25,7 @@ define <16 x i8> @load_splat_v16i8(i8* %p) { ; CHECK: .functype load_splat_v16i8 (i32) -> (v128) ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v8x16.load_splat 0 +; CHECK-NEXT: v128.load8_splat 0 ; CHECK-NEXT: # fallthrough-return %e = load i8, i8* %p %v1 = insertelement <16 x i8> undef, i8 %e, i32 0 @@ -52,7 +52,7 @@ define <16 x i8> @load_splat_v16i8_with_folded_offset(i8* %p) { ; CHECK: .functype load_splat_v16i8_with_folded_offset (i32) -> (v128) ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v8x16.load_splat 16 +; CHECK-NEXT: v128.load8_splat 16 ; CHECK-NEXT: # fallthrough-return %q = ptrtoint i8* %p to i32 %r = add nuw i32 %q, 16 @@ -80,7 +80,7 @@ define <16 x i8> @load_splat_v16i8_with_folded_gep_offset(i8* %p) { ; CHECK: .functype load_splat_v16i8_with_folded_gep_offset (i32) -> (v128) ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v8x16.load_splat 1 +; CHECK-NEXT: v128.load8_splat 1 ; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds i8, i8* %p, i32 1 %e = load i8, i8* %s @@ -110,7 +110,7 @@ define <16 x i8> @load_splat_v16i8_with_unfolded_gep_negative_offset(i8* %p) { ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i32.const -1 ; CHECK-NEXT: i32.add -; CHECK-NEXT: v8x16.load_splat 0 +; CHECK-NEXT: v128.load8_splat 0 ; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds i8, i8* %p, i32 -1 %e = load i8, i8* %s @@ -142,7 +142,7 @@ define <16 x i8> @load_splat_v16i8_with_unfolded_offset(i8* %p) { ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i32.const 16 ; CHECK-NEXT: i32.add -; CHECK-NEXT: v8x16.load_splat 0 +; CHECK-NEXT: v128.load8_splat 0 ; CHECK-NEXT: # fallthrough-return %q = ptrtoint i8* %p to i32 %r = add nsw i32 %q, 16 @@ -174,7 +174,7 @@ define <16 x i8> @load_splat_v16i8_with_unfolded_gep_offset(i8* %p) { ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i32.const 1 ; CHECK-NEXT: i32.add -; CHECK-NEXT: v8x16.load_splat 0 +; CHECK-NEXT: v128.load8_splat 0 ; CHECK-NEXT: # fallthrough-return %s = getelementptr i8, i8* %p, i32 1 %e = load i8, i8* %s @@ -200,7 +200,7 @@ define <16 x i8> @load_splat_v16i8_from_numeric_address() { ; CHECK: .functype load_splat_v16i8_from_numeric_address () -> (v128) ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: i32.const 0 -; CHECK-NEXT: v8x16.load_splat 32 +; CHECK-NEXT: v128.load8_splat 32 ; CHECK-NEXT: # fallthrough-return %s = inttoptr i32 32 to i8* %e = load i8, i8* %s @@ -227,7 +227,7 @@ define <16 x i8> @load_splat_v16i8_from_global_address() { ; CHECK: .functype load_splat_v16i8_from_global_address () -> (v128) ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: i32.const 0 -; CHECK-NEXT: v8x16.load_splat gv_i8 +; CHECK-NEXT: v128.load8_splat gv_i8 ; CHECK-NEXT: # fallthrough-return %e = load i8, i8* @gv_i8 %v1 = insertelement <16 x i8> undef, i8 %e, i32 0 @@ -366,7 +366,7 @@ define <8 x i16> @load_splat_v8i16(i16* %p) { ; CHECK: .functype load_splat_v8i16 (i32) -> (v128) ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v16x8.load_splat 0 +; CHECK-NEXT: v128.load16_splat 0 ; CHECK-NEXT: # fallthrough-return %e = load i16, i16* %p %v1 = insertelement <8 x i16> undef, i16 %e, i32 0 @@ -428,7 +428,7 @@ define <8 x i16> @load_splat_v8i16_with_folded_offset(i16* %p) { ; CHECK: .functype load_splat_v8i16_with_folded_offset (i32) -> (v128) ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v16x8.load_splat 16 +; CHECK-NEXT: v128.load16_splat 16 ; CHECK-NEXT: # fallthrough-return %q = ptrtoint i16* %p to i32 %r = add nuw i32 %q, 16 @@ -500,7 +500,7 @@ define <8 x i16> @load_splat_v8i16_with_folded_gep_offset(i16* %p) { ; CHECK: .functype load_splat_v8i16_with_folded_gep_offset (i32) -> (v128) ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v16x8.load_splat 2 +; CHECK-NEXT: v128.load16_splat 2 ; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds i16, i16* %p, i32 1 %e = load i16, i16* %s @@ -568,7 +568,7 @@ define <8 x i16> @load_splat_v8i16_with_unfolded_gep_negative_offset(i16* %p) { ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i32.const -2 ; CHECK-NEXT: i32.add -; CHECK-NEXT: v16x8.load_splat 0 +; CHECK-NEXT: v128.load16_splat 0 ; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds i16, i16* %p, i32 -1 %e = load i16, i16* %s @@ -644,7 +644,7 @@ define <8 x i16> @load_splat_v8i16_with_unfolded_offset(i16* %p) { ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i32.const 16 ; CHECK-NEXT: i32.add -; CHECK-NEXT: v16x8.load_splat 0 +; CHECK-NEXT: v128.load16_splat 0 ; CHECK-NEXT: # fallthrough-return %q = ptrtoint i16* %p to i32 %r = add nsw i32 %q, 16 @@ -726,7 +726,7 @@ define <8 x i16> @load_splat_v8i16_with_unfolded_gep_offset(i16* %p) { ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i32.const 2 ; CHECK-NEXT: i32.add -; CHECK-NEXT: v16x8.load_splat 0 +; CHECK-NEXT: v128.load16_splat 0 ; CHECK-NEXT: # fallthrough-return %s = getelementptr i16, i16* %p, i32 1 %e = load i16, i16* %s @@ -796,7 +796,7 @@ define <8 x i16> @load_splat_v8i16_from_numeric_address() { ; CHECK: .functype load_splat_v8i16_from_numeric_address () -> (v128) ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: i32.const 0 -; CHECK-NEXT: v16x8.load_splat 32 +; CHECK-NEXT: v128.load16_splat 32 ; CHECK-NEXT: # fallthrough-return %s = inttoptr i32 32 to i16* %e = load i16, i16* %s @@ -861,7 +861,7 @@ define <8 x i16> @load_splat_v8i16_from_global_address() { ; CHECK: .functype load_splat_v8i16_from_global_address () -> (v128) ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: i32.const 0 -; CHECK-NEXT: v16x8.load_splat gv_i16 +; CHECK-NEXT: v128.load16_splat gv_i16 ; CHECK-NEXT: # fallthrough-return %e = load i16, i16* @gv_i16 %v1 = insertelement <8 x i16> undef, i16 %e, i32 0 @@ -1197,7 +1197,7 @@ define <4 x i32> @load_splat_v4i32(i32* %addr) { ; CHECK: .functype load_splat_v4i32 (i32) -> (v128) ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v32x4.load_splat 0 +; CHECK-NEXT: v128.load32_splat 0 ; CHECK-NEXT: # fallthrough-return %e = load i32, i32* %addr, align 4 %v1 = insertelement <4 x i32> undef, i32 %e, i32 0 @@ -1259,7 +1259,7 @@ define <4 x i32> @load_splat_v4i32_with_folded_offset(i32* %p) { ; CHECK: .functype load_splat_v4i32_with_folded_offset (i32) -> (v128) ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v32x4.load_splat 16 +; CHECK-NEXT: v128.load32_splat 16 ; CHECK-NEXT: # fallthrough-return %q = ptrtoint i32* %p to i32 %r = add nuw i32 %q, 16 @@ -1331,7 +1331,7 @@ define <4 x i32> @load_splat_v4i32_with_folded_gep_offset(i32* %p) { ; CHECK: .functype load_splat_v4i32_with_folded_gep_offset (i32) -> (v128) ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v32x4.load_splat 4 +; CHECK-NEXT: v128.load32_splat 4 ; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds i32, i32* %p, i32 1 %e = load i32, i32* %s @@ -1399,7 +1399,7 @@ define <4 x i32> @load_splat_v4i32_with_unfolded_gep_negative_offset(i32* %p) { ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i32.const -4 ; CHECK-NEXT: i32.add -; CHECK-NEXT: v32x4.load_splat 0 +; CHECK-NEXT: v128.load32_splat 0 ; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds i32, i32* %p, i32 -1 %e = load i32, i32* %s @@ -1475,7 +1475,7 @@ define <4 x i32> @load_splat_v4i32_with_unfolded_offset(i32* %p) { ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i32.const 16 ; CHECK-NEXT: i32.add -; CHECK-NEXT: v32x4.load_splat 0 +; CHECK-NEXT: v128.load32_splat 0 ; CHECK-NEXT: # fallthrough-return %q = ptrtoint i32* %p to i32 %r = add nsw i32 %q, 16 @@ -1557,7 +1557,7 @@ define <4 x i32> @load_splat_v4i32_with_unfolded_gep_offset(i32* %p) { ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i32.const 4 ; CHECK-NEXT: i32.add -; CHECK-NEXT: v32x4.load_splat 0 +; CHECK-NEXT: v128.load32_splat 0 ; CHECK-NEXT: # fallthrough-return %s = getelementptr i32, i32* %p, i32 1 %e = load i32, i32* %s @@ -1627,7 +1627,7 @@ define <4 x i32> @load_splat_v4i32_from_numeric_address() { ; CHECK: .functype load_splat_v4i32_from_numeric_address () -> (v128) ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: i32.const 0 -; CHECK-NEXT: v32x4.load_splat 32 +; CHECK-NEXT: v128.load32_splat 32 ; CHECK-NEXT: # fallthrough-return %s = inttoptr i32 32 to i32* %e = load i32, i32* %s @@ -1692,7 +1692,7 @@ define <4 x i32> @load_splat_v4i32_from_global_address() { ; CHECK: .functype load_splat_v4i32_from_global_address () -> (v128) ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: i32.const 0 -; CHECK-NEXT: v32x4.load_splat gv_i32 +; CHECK-NEXT: v128.load32_splat gv_i32 ; CHECK-NEXT: # fallthrough-return %e = load i32, i32* @gv_i32 %v1 = insertelement <4 x i32> undef, i32 %e, i32 0 @@ -2027,7 +2027,7 @@ define <2 x i64> @load_splat_v2i64(i64* %p) { ; CHECK: .functype load_splat_v2i64 (i32) -> (v128) ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v64x2.load_splat 0 +; CHECK-NEXT: v128.load64_splat 0 ; CHECK-NEXT: # fallthrough-return %e = load i64, i64* %p %v1 = insertelement <2 x i64> undef, i64 %e, i32 0 @@ -2089,7 +2089,7 @@ define <2 x i64> @load_splat_v2i64_with_folded_offset(i64* %p) { ; CHECK: .functype load_splat_v2i64_with_folded_offset (i32) -> (v128) ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v64x2.load_splat 16 +; CHECK-NEXT: v128.load64_splat 16 ; CHECK-NEXT: # fallthrough-return %q = ptrtoint i64* %p to i32 %r = add nuw i32 %q, 16 @@ -2161,7 +2161,7 @@ define <2 x i64> @load_splat_v2i64_with_folded_gep_offset(i64* %p) { ; CHECK: .functype load_splat_v2i64_with_folded_gep_offset (i32) -> (v128) ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v64x2.load_splat 8 +; CHECK-NEXT: v128.load64_splat 8 ; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds i64, i64* %p, i32 1 %e = load i64, i64* %s @@ -2229,7 +2229,7 @@ define <2 x i64> @load_splat_v2i64_with_unfolded_gep_negative_offset(i64* %p) { ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i32.const -8 ; CHECK-NEXT: i32.add -; CHECK-NEXT: v64x2.load_splat 0 +; CHECK-NEXT: v128.load64_splat 0 ; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds i64, i64* %p, i32 -1 %e = load i64, i64* %s @@ -2305,7 +2305,7 @@ define <2 x i64> @load_splat_v2i64_with_unfolded_offset(i64* %p) { ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i32.const 16 ; CHECK-NEXT: i32.add -; CHECK-NEXT: v64x2.load_splat 0 +; CHECK-NEXT: v128.load64_splat 0 ; CHECK-NEXT: # fallthrough-return %q = ptrtoint i64* %p to i32 %r = add nsw i32 %q, 16 @@ -2387,7 +2387,7 @@ define <2 x i64> @load_splat_v2i64_with_unfolded_gep_offset(i64* %p) { ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i32.const 8 ; CHECK-NEXT: i32.add -; CHECK-NEXT: v64x2.load_splat 0 +; CHECK-NEXT: v128.load64_splat 0 ; CHECK-NEXT: # fallthrough-return %s = getelementptr i64, i64* %p, i32 1 %e = load i64, i64* %s @@ -2457,7 +2457,7 @@ define <2 x i64> @load_splat_v2i64_from_numeric_address() { ; CHECK: .functype load_splat_v2i64_from_numeric_address () -> (v128) ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: i32.const 0 -; CHECK-NEXT: v64x2.load_splat 32 +; CHECK-NEXT: v128.load64_splat 32 ; CHECK-NEXT: # fallthrough-return %s = inttoptr i32 32 to i64* %e = load i64, i64* %s @@ -2522,7 +2522,7 @@ define <2 x i64> @load_splat_v2i64_from_global_address() { ; CHECK: .functype load_splat_v2i64_from_global_address () -> (v128) ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: i32.const 0 -; CHECK-NEXT: v64x2.load_splat gv_i64 +; CHECK-NEXT: v128.load64_splat gv_i64 ; CHECK-NEXT: # fallthrough-return %e = load i64, i64* @gv_i64 %v1 = insertelement <2 x i64> undef, i64 %e, i32 0 @@ -2697,7 +2697,7 @@ define <4 x float> @load_splat_v4f32(float* %p) { ; CHECK: .functype load_splat_v4f32 (i32) -> (v128) ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v32x4.load_splat 0 +; CHECK-NEXT: v128.load32_splat 0 ; CHECK-NEXT: # fallthrough-return %e = load float, float* %p %v1 = insertelement <4 x float> undef, float %e, i32 0 @@ -2724,7 +2724,7 @@ define <4 x float> @load_splat_v4f32_with_folded_offset(float* %p) { ; CHECK: .functype load_splat_v4f32_with_folded_offset (i32) -> (v128) ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v32x4.load_splat 16 +; CHECK-NEXT: v128.load32_splat 16 ; CHECK-NEXT: # fallthrough-return %q = ptrtoint float* %p to i32 %r = add nuw i32 %q, 16 @@ -2752,7 +2752,7 @@ define <4 x float> @load_splat_v4f32_with_folded_gep_offset(float* %p) { ; CHECK: .functype load_splat_v4f32_with_folded_gep_offset (i32) -> (v128) ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v32x4.load_splat 4 +; CHECK-NEXT: v128.load32_splat 4 ; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds float, float* %p, i32 1 %e = load float, float* %s @@ -2782,7 +2782,7 @@ define <4 x float> @load_splat_v4f32_with_unfolded_gep_negative_offset(float* %p ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i32.const -4 ; CHECK-NEXT: i32.add -; CHECK-NEXT: v32x4.load_splat 0 +; CHECK-NEXT: v128.load32_splat 0 ; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds float, float* %p, i32 -1 %e = load float, float* %s @@ -2814,7 +2814,7 @@ define <4 x float> @load_splat_v4f32_with_unfolded_offset(float* %p) { ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i32.const 16 ; CHECK-NEXT: i32.add -; CHECK-NEXT: v32x4.load_splat 0 +; CHECK-NEXT: v128.load32_splat 0 ; CHECK-NEXT: # fallthrough-return %q = ptrtoint float* %p to i32 %r = add nsw i32 %q, 16 @@ -2846,7 +2846,7 @@ define <4 x float> @load_splat_v4f32_with_unfolded_gep_offset(float* %p) { ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i32.const 4 ; CHECK-NEXT: i32.add -; CHECK-NEXT: v32x4.load_splat 0 +; CHECK-NEXT: v128.load32_splat 0 ; CHECK-NEXT: # fallthrough-return %s = getelementptr float, float* %p, i32 1 %e = load float, float* %s @@ -2872,7 +2872,7 @@ define <4 x float> @load_splat_v4f32_from_numeric_address() { ; CHECK: .functype load_splat_v4f32_from_numeric_address () -> (v128) ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: i32.const 0 -; CHECK-NEXT: v32x4.load_splat 32 +; CHECK-NEXT: v128.load32_splat 32 ; CHECK-NEXT: # fallthrough-return %s = inttoptr i32 32 to float* %e = load float, float* %s @@ -2899,7 +2899,7 @@ define <4 x float> @load_splat_v4f32_from_global_address() { ; CHECK: .functype load_splat_v4f32_from_global_address () -> (v128) ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: i32.const 0 -; CHECK-NEXT: v32x4.load_splat gv_f32 +; CHECK-NEXT: v128.load32_splat gv_f32 ; CHECK-NEXT: # fallthrough-return %e = load float, float* @gv_f32 %v1 = insertelement <4 x float> undef, float %e, i32 0 @@ -3038,7 +3038,7 @@ define <2 x double> @load_splat_v2f64(double* %p) { ; CHECK: .functype load_splat_v2f64 (i32) -> (v128) ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v64x2.load_splat 0 +; CHECK-NEXT: v128.load64_splat 0 ; CHECK-NEXT: # fallthrough-return %e = load double, double* %p %v1 = insertelement <2 x double> undef, double %e, i32 0 @@ -3065,7 +3065,7 @@ define <2 x double> @load_splat_v2f64_with_folded_offset(double* %p) { ; CHECK: .functype load_splat_v2f64_with_folded_offset (i32) -> (v128) ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v64x2.load_splat 16 +; CHECK-NEXT: v128.load64_splat 16 ; CHECK-NEXT: # fallthrough-return %q = ptrtoint double* %p to i32 %r = add nuw i32 %q, 16 @@ -3093,7 +3093,7 @@ define <2 x double> @load_splat_v2f64_with_folded_gep_offset(double* %p) { ; CHECK: .functype load_splat_v2f64_with_folded_gep_offset (i32) -> (v128) ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v64x2.load_splat 8 +; CHECK-NEXT: v128.load64_splat 8 ; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds double, double* %p, i32 1 %e = load double, double* %s @@ -3123,7 +3123,7 @@ define <2 x double> @load_splat_v2f64_with_unfolded_gep_negative_offset(double* ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i32.const -8 ; CHECK-NEXT: i32.add -; CHECK-NEXT: v64x2.load_splat 0 +; CHECK-NEXT: v128.load64_splat 0 ; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds double, double* %p, i32 -1 %e = load double, double* %s @@ -3155,7 +3155,7 @@ define <2 x double> @load_splat_v2f64_with_unfolded_offset(double* %p) { ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i32.const 16 ; CHECK-NEXT: i32.add -; CHECK-NEXT: v64x2.load_splat 0 +; CHECK-NEXT: v128.load64_splat 0 ; CHECK-NEXT: # fallthrough-return %q = ptrtoint double* %p to i32 %r = add nsw i32 %q, 16 @@ -3187,7 +3187,7 @@ define <2 x double> @load_splat_v2f64_with_unfolded_gep_offset(double* %p) { ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i32.const 8 ; CHECK-NEXT: i32.add -; CHECK-NEXT: v64x2.load_splat 0 +; CHECK-NEXT: v128.load64_splat 0 ; CHECK-NEXT: # fallthrough-return %s = getelementptr double, double* %p, i32 1 %e = load double, double* %s @@ -3213,7 +3213,7 @@ define <2 x double> @load_splat_v2f64_from_numeric_address() { ; CHECK: .functype load_splat_v2f64_from_numeric_address () -> (v128) ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: i32.const 0 -; CHECK-NEXT: v64x2.load_splat 32 +; CHECK-NEXT: v128.load64_splat 32 ; CHECK-NEXT: # fallthrough-return %s = inttoptr i32 32 to double* %e = load double, double* %s @@ -3240,7 +3240,7 @@ define <2 x double> @load_splat_v2f64_from_global_address() { ; CHECK: .functype load_splat_v2f64_from_global_address () -> (v128) ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: i32.const 0 -; CHECK-NEXT: v64x2.load_splat gv_f64 +; CHECK-NEXT: v128.load64_splat gv_f64 ; CHECK-NEXT: # fallthrough-return %e = load double, double* @gv_f64 %v1 = insertelement <2 x double> undef, double %e, i32 0 diff --git a/llvm/test/CodeGen/WebAssembly/simd-shift-complex-splats.ll b/llvm/test/CodeGen/WebAssembly/simd-shift-complex-splats.ll index 2473f0b27b7e8..4582bc62216a7 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-shift-complex-splats.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-shift-complex-splats.ll @@ -67,7 +67,7 @@ define <16 x i8> @shl_abs(<16 x i8> %v, i8 %a) { ; CHECK-NEXT: i8x16.splat $push1=, $1 ; CHECK-NEXT: i8x16.splat $push0=, $2 ; CHECK-NEXT: i8x16.add $push2=, $pop1, $pop0 -; CHECK-NEXT: v8x16.shuffle $push3=, $pop2, $0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK-NEXT: i8x16.shuffle $push3=, $pop2, $0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ; CHECK-NEXT: i8x16.abs $push101=, $pop3 ; CHECK-NEXT: local.tee $push100=, $3=, $pop101 ; CHECK-NEXT: i8x16.extract_lane_u $push9=, $pop100, 0 diff --git a/llvm/test/CodeGen/WebAssembly/simd-shuffle-bitcast.ll b/llvm/test/CodeGen/WebAssembly/simd-shuffle-bitcast.ll index b3e2db98861d5..3e73b47c5a5b8 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-shuffle-bitcast.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-shuffle-bitcast.ll @@ -21,7 +21,7 @@ define <4 x i32> @f32x4_splat(float %x) { ; CHECK-LABEL: not_a_vec: ; CHECK-NEXT: .functype not_a_vec (i64, i64) -> (v128){{$}} ; CHECK-NEXT: i64x2.splat $push[[L1:[0-9]+]]=, $0{{$}} -; CHECK-NEXT: v8x16.shuffle $push[[R:[0-9]+]]=, $pop[[L1]], $2, 0, 1, 2, 3 +; CHECK-NEXT: i8x16.shuffle $push[[R:[0-9]+]]=, $pop[[L1]], $2, 0, 1, 2, 3 ; CHECK-NEXT: return $pop[[R]] define <4 x i32> @not_a_vec(i128 %x) { %a = bitcast i128 %x to <4 x i32> diff --git a/llvm/test/CodeGen/WebAssembly/simd.ll b/llvm/test/CodeGen/WebAssembly/simd.ll index 25e647f07230a..c8053293ebac0 100644 --- a/llvm/test/CodeGen/WebAssembly/simd.ll +++ b/llvm/test/CodeGen/WebAssembly/simd.ll @@ -202,9 +202,9 @@ define <16 x i8> @replace_zero_v16i8(<16 x i8> %v, i8 %x) { } ; CHECK-LABEL: shuffle_v16i8: -; NO-SIMD128-NOT: v8x16 +; NO-SIMD128-NOT: i8x16 ; SIMD128-NEXT: .functype shuffle_v16i8 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: v8x16.shuffle $push[[R:[0-9]+]]=, $0, $1, +; SIMD128-NEXT: i8x16.shuffle $push[[R:[0-9]+]]=, $0, $1, ; SIMD128-SAME: 0, 17, 2, 19, 4, 21, 6, 23, 8, 25, 10, 27, 12, 29, 14, 31{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <16 x i8> @shuffle_v16i8(<16 x i8> %x, <16 x i8> %y) { @@ -215,9 +215,9 @@ define <16 x i8> @shuffle_v16i8(<16 x i8> %x, <16 x i8> %y) { } ; CHECK-LABEL: shuffle_undef_v16i8: -; NO-SIMD128-NOT: v8x16 +; NO-SIMD128-NOT: i8x16 ; SIMD128-NEXT: .functype shuffle_undef_v16i8 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: v8x16.shuffle $push[[R:[0-9]+]]=, $0, $0, +; SIMD128-NEXT: i8x16.shuffle $push[[R:[0-9]+]]=, $0, $0, ; SIMD128-SAME: 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <16 x i8> @shuffle_undef_v16i8(<16 x i8> %x, <16 x i8> %y) { @@ -472,9 +472,9 @@ define <8 x i16> @replace_zero_v8i16(<8 x i16> %v, i16 %x) { } ; CHECK-LABEL: shuffle_v8i16: -; NO-SIMD128-NOT: v8x16 +; NO-SIMD128-NOT: i8x16 ; SIMD128-NEXT: .functype shuffle_v8i16 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: v8x16.shuffle $push[[R:[0-9]+]]=, $0, $1, +; SIMD128-NEXT: i8x16.shuffle $push[[R:[0-9]+]]=, $0, $1, ; SIMD128-SAME: 0, 1, 18, 19, 4, 5, 22, 23, 8, 9, 26, 27, 12, 13, 30, 31{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <8 x i16> @shuffle_v8i16(<8 x i16> %x, <8 x i16> %y) { @@ -484,9 +484,9 @@ define <8 x i16> @shuffle_v8i16(<8 x i16> %x, <8 x i16> %y) { } ; CHECK-LABEL: shuffle_undef_v8i16: -; NO-SIMD128-NOT: v8x16 +; NO-SIMD128-NOT: i8x16 ; SIMD128-NEXT: .functype shuffle_undef_v8i16 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: v8x16.shuffle $push[[R:[0-9]+]]=, $0, $0, +; SIMD128-NEXT: i8x16.shuffle $push[[R:[0-9]+]]=, $0, $0, ; SIMD128-SAME: 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <8 x i16> @shuffle_undef_v8i16(<8 x i16> %x, <8 x i16> %y) { @@ -634,9 +634,9 @@ define <4 x i32> @replace_zero_v4i32(<4 x i32> %v, i32 %x) { } ; CHECK-LABEL: shuffle_v4i32: -; NO-SIMD128-NOT: v8x16 +; NO-SIMD128-NOT: i8x16 ; SIMD128-NEXT: .functype shuffle_v4i32 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: v8x16.shuffle $push[[R:[0-9]+]]=, $0, $1, +; SIMD128-NEXT: i8x16.shuffle $push[[R:[0-9]+]]=, $0, $1, ; SIMD128-SAME: 0, 1, 2, 3, 20, 21, 22, 23, 8, 9, 10, 11, 28, 29, 30, 31{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i32> @shuffle_v4i32(<4 x i32> %x, <4 x i32> %y) { @@ -646,9 +646,9 @@ define <4 x i32> @shuffle_v4i32(<4 x i32> %x, <4 x i32> %y) { } ; CHECK-LABEL: shuffle_undef_v4i32: -; NO-SIMD128-NOT: v8x16 +; NO-SIMD128-NOT: i8x16 ; SIMD128-NEXT: .functype shuffle_undef_v4i32 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: v8x16.shuffle $push[[R:[0-9]+]]=, $0, $0, +; SIMD128-NEXT: i8x16.shuffle $push[[R:[0-9]+]]=, $0, $0, ; SIMD128-SAME: 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i32> @shuffle_undef_v4i32(<4 x i32> %x, <4 x i32> %y) { @@ -785,9 +785,9 @@ define <2 x i64> @replace_zero_v2i64(<2 x i64> %v, i64 %x) { } ; CHECK-LABEL: shuffle_v2i64: -; NO-SIMD128-NOT: v8x16 +; NO-SIMD128-NOT: i8x16 ; SIMD128-NEXT: .functype shuffle_v2i64 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: v8x16.shuffle $push[[R:[0-9]+]]=, $0, $1, +; SIMD128-NEXT: i8x16.shuffle $push[[R:[0-9]+]]=, $0, $1, ; SIMD128-SAME: 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @shuffle_v2i64(<2 x i64> %x, <2 x i64> %y) { @@ -796,9 +796,9 @@ define <2 x i64> @shuffle_v2i64(<2 x i64> %x, <2 x i64> %y) { } ; CHECK-LABEL: shuffle_undef_v2i64: -; NO-SIMD128-NOT: v8x16 +; NO-SIMD128-NOT: i8x16 ; SIMD128-NEXT: .functype shuffle_undef_v2i64 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: v8x16.shuffle $push[[R:[0-9]+]]=, $0, $0, +; SIMD128-NEXT: i8x16.shuffle $push[[R:[0-9]+]]=, $0, $0, ; SIMD128-SAME: 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @shuffle_undef_v2i64(<2 x i64> %x, <2 x i64> %y) { @@ -934,9 +934,9 @@ define <4 x float> @replace_zero_v4f32(<4 x float> %v, float %x) { } ; CHECK-LABEL: shuffle_v4f32: -; NO-SIMD128-NOT: v8x16 +; NO-SIMD128-NOT: i8x16 ; SIMD128-NEXT: .functype shuffle_v4f32 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: v8x16.shuffle $push[[R:[0-9]+]]=, $0, $1, +; SIMD128-NEXT: i8x16.shuffle $push[[R:[0-9]+]]=, $0, $1, ; SIMD128-SAME: 0, 1, 2, 3, 20, 21, 22, 23, 8, 9, 10, 11, 28, 29, 30, 31{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x float> @shuffle_v4f32(<4 x float> %x, <4 x float> %y) { @@ -946,9 +946,9 @@ define <4 x float> @shuffle_v4f32(<4 x float> %x, <4 x float> %y) { } ; CHECK-LABEL: shuffle_undef_v4f32: -; NO-SIMD128-NOT: v8x16 +; NO-SIMD128-NOT: i8x16 ; SIMD128-NEXT: .functype shuffle_undef_v4f32 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: v8x16.shuffle $push[[R:[0-9]+]]=, $0, $0, +; SIMD128-NEXT: i8x16.shuffle $push[[R:[0-9]+]]=, $0, $0, ; SIMD128-SAME: 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x float> @shuffle_undef_v4f32(<4 x float> %x, <4 x float> %y) { @@ -1085,9 +1085,9 @@ define <2 x double> @replace_zero_v2f64(<2 x double> %v, double %x) { } ; CHECK-LABEL: shuffle_v2f64: -; NO-SIMD128-NOT: v8x16 +; NO-SIMD128-NOT: i8x16 ; SIMD128-NEXT: .functype shuffle_v2f64 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: v8x16.shuffle $push[[R:[0-9]+]]=, $0, $1, +; SIMD128-NEXT: i8x16.shuffle $push[[R:[0-9]+]]=, $0, $1, ; SIMD128-SAME: 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x double> @shuffle_v2f64(<2 x double> %x, <2 x double> %y) { @@ -1097,9 +1097,9 @@ define <2 x double> @shuffle_v2f64(<2 x double> %x, <2 x double> %y) { } ; CHECK-LABEL: shuffle_undef_v2f64: -; NO-SIMD128-NOT: v8x16 +; NO-SIMD128-NOT: i8x16 ; SIMD128-NEXT: .functype shuffle_undef_v2f64 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: v8x16.shuffle $push[[R:[0-9]+]]=, $0, $0, +; SIMD128-NEXT: i8x16.shuffle $push[[R:[0-9]+]]=, $0, $0, ; SIMD128-SAME: 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x double> @shuffle_undef_v2f64(<2 x double> %x, <2 x double> %y) { diff --git a/llvm/test/MC/Disassembler/WebAssembly/wasm.txt b/llvm/test/MC/Disassembler/WebAssembly/wasm.txt index bb50b646ab549..8201213e54b26 100644 --- a/llvm/test/MC/Disassembler/WebAssembly/wasm.txt +++ b/llvm/test/MC/Disassembler/WebAssembly/wasm.txt @@ -36,7 +36,7 @@ # CHECK: v128.const 50462976, 117835012, 185207048, 252579084 0xFD 0x0C 0x00 0x01 0x02 0x03 0x04 0x05 0x06 0x07 0x08 0x09 0x0A 0x0B 0x0C 0x0D 0x0E 0x0F -# CHECK: v8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 +# CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 0xFD 0x0D 0x00 0x01 0x02 0x03 0x04 0x05 0x06 0x07 0x08 0x09 0x0A 0x0B 0x0C 0x0D 0x0E 0x0F # Check LEB128 encoding of SIMD instructions diff --git a/llvm/test/MC/WebAssembly/simd-encodings.s b/llvm/test/MC/WebAssembly/simd-encodings.s index 509f4246475f0..91e1f07fe44c4 100644 --- a/llvm/test/MC/WebAssembly/simd-encodings.s +++ b/llvm/test/MC/WebAssembly/simd-encodings.s @@ -24,17 +24,17 @@ main: # CHECK: i64x2.load32x2_u 32 # encoding: [0xfd,0x06,0x03,0x20] i64x2.load32x2_u 32 - # CHECK: v8x16.load_splat 48 # encoding: [0xfd,0x07,0x00,0x30] - v8x16.load_splat 48 + # CHECK: v128.load8_splat 48 # encoding: [0xfd,0x07,0x00,0x30] + v128.load8_splat 48 - # CHECK: v16x8.load_splat 48 # encoding: [0xfd,0x08,0x01,0x30] - v16x8.load_splat 48 + # CHECK: v128.load16_splat 48 # encoding: [0xfd,0x08,0x01,0x30] + v128.load16_splat 48 - # CHECK: v32x4.load_splat 48 # encoding: [0xfd,0x09,0x02,0x30] - v32x4.load_splat 48 + # CHECK: v128.load32_splat 48 # encoding: [0xfd,0x09,0x02,0x30] + v128.load32_splat 48 - # CHECK: v64x2.load_splat 48 # encoding: [0xfd,0x0a,0x03,0x30] - v64x2.load_splat 48 + # CHECK: v128.load64_splat 48 # encoding: [0xfd,0x0a,0x03,0x30] + v128.load64_splat 48 # CHECK: v128.store 48 # encoding: [0xfd,0x0b,0x04,0x30] v128.store 48 @@ -66,15 +66,15 @@ main: # CHECK-SAME: 0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f] v128.const 0x1.60504030201p-911, 0x1.e0d0c0b0a0908p-783 - # CHECK: v8x16.shuffle 0, 17, 2, 19, 4, 21, 6, 23, + # CHECK: i8x16.shuffle 0, 17, 2, 19, 4, 21, 6, 23, # CHECK-SAME: 8, 25, 10, 27, 12, 29, 14, 31 # CHECK-SAME: # encoding: [0xfd,0x0d, # CHECK-SAME: 0x00,0x11,0x02,0x13,0x04,0x15,0x06,0x17, # CHECK-SAME: 0x08,0x19,0x0a,0x1b,0x0c,0x1d,0x0e,0x1f] - v8x16.shuffle 0, 17, 2, 19, 4, 21, 6, 23, 8, 25, 10, 27, 12, 29, 14, 31 + i8x16.shuffle 0, 17, 2, 19, 4, 21, 6, 23, 8, 25, 10, 27, 12, 29, 14, 31 - # CHECK: v8x16.swizzle # encoding: [0xfd,0x0e] - v8x16.swizzle + # CHECK: i8x16.swizzle # encoding: [0xfd,0x0e] + i8x16.swizzle # CHECK: i8x16.splat # encoding: [0xfd,0x0f] i8x16.splat From 8de43b926f0e960bbc5b6a53d1b613c46b7c774b Mon Sep 17 00:00:00 2001 From: Alex Zinenko Date: Tue, 22 Dec 2020 11:22:21 +0100 Subject: [PATCH 134/378] [mlir] Remove instance methods from LLVMType LLVMType contains multiple instance methods that were introduced initially for compatibility with LLVM API. These methods boil down to `cast` followed by type-specific call. Arguably, they are mostly used in an LLVM cast-follows-isa anti-pattern. This doesn't connect nicely to the rest of the MLIR infrastructure and actively prevents it from making the LLVM dialect type system more open, e.g., reusing built-in types when appropriate. Remove such instance methods and replaces their uses with apporpriate casts and methods on derived classes. In some cases, the result may look slightly more verbose, but most cases should actually use a stricter subtype of LLVMType anyway and avoid the isa/cast. Reviewed By: mehdi_amini Differential Revision: https://reviews.llvm.org/D93680 --- .../StandardToLLVM/ConvertStandardToLLVM.h | 3 +- mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td | 28 ++- mlir/include/mlir/Dialect/LLVMIR/LLVMTypes.h | 75 ++----- mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td | 9 +- .../Conversion/AsyncToLLVM/AsyncToLLVM.cpp | 10 +- .../ConvertLaunchFuncToRuntimeCalls.cpp | 15 +- .../lib/Conversion/GPUCommon/GPUOpsLowering.h | 18 +- .../GPUCommon/OpToFuncCallLowering.h | 9 +- .../ConvertLaunchFuncToVulkanCalls.cpp | 45 ++-- .../SPIRVToLLVM/ConvertSPIRVToLLVM.cpp | 8 +- .../StandardToLLVM/StandardToLLVM.cpp | 159 +++++++------ .../VectorToLLVM/ConvertVectorToLLVM.cpp | 22 +- .../VectorToROCDL/VectorToROCDL.cpp | 6 +- mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp | 211 +++++++++--------- mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp | 170 ++++---------- mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp | 5 +- mlir/lib/ExecutionEngine/JitRunner.cpp | 19 +- mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp | 45 ++-- mlir/lib/Target/LLVMIR/ModuleTranslation.cpp | 4 +- mlir/test/Dialect/LLVMIR/invalid.mlir | 8 +- 20 files changed, 402 insertions(+), 467 deletions(-) diff --git a/mlir/include/mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h b/mlir/include/mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h index 7c069c9cd5566..63ff16a84ab87 100644 --- a/mlir/include/mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h +++ b/mlir/include/mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h @@ -446,7 +446,8 @@ class UnrankedMemRefDescriptor : public StructBuilder { /// Builds IR extracting the pointer to the first element of the size array. static Value sizeBasePtr(OpBuilder &builder, Location loc, LLVMTypeConverter &typeConverter, - Value memRefDescPtr, LLVM::LLVMType elemPtrPtrType); + Value memRefDescPtr, + LLVM::LLVMPointerType elemPtrPtrType); /// Builds IR extracting the size[index] from the descriptor. static Value size(OpBuilder &builder, Location loc, LLVMTypeConverter typeConverter, Value sizeBasePtr, diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td index df022ef47b331..552fe15e68997 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td @@ -51,7 +51,7 @@ def LLVM_VoidResultTypeOpBuilder : [{ auto llvmType = resultType.dyn_cast(); (void)llvmType; assert(llvmType && "result must be an LLVM type"); - assert(llvmType.isVoidTy() && + assert(llvmType.isa() && "for zero-result operands, only 'void' is accepted as result type"); build($_builder, $_state, operands, attributes); }]>; @@ -288,7 +288,7 @@ def LLVM_LoadOp : LLVM_Op<"load">, MemoryOpWithAlignmentAndAttributes { OpBuilderDAG<(ins "Value":$addr, CArg<"unsigned", "0">:$alignment, CArg<"bool", "false">:$isVolatile, CArg<"bool", "false">:$isNonTemporal), [{ - auto type = addr.getType().cast().getPointerElementTy(); + auto type = addr.getType().cast().getElementType(); build($_builder, $_state, type, addr, alignment, isVolatile, isNonTemporal); }]>, OpBuilderDAG<(ins "Type":$t, "Value":$addr, @@ -443,8 +443,8 @@ def LLVM_CallOp : LLVM_Op<"call"> { OpBuilderDAG<(ins "LLVMFuncOp":$func, "ValueRange":$operands, CArg<"ArrayRef", "{}">:$attributes), [{ - LLVMType resultType = func.getType().getFunctionResultType(); - if (!resultType.isVoidTy()) + LLVMType resultType = func.getType().getReturnType(); + if (!resultType.isa()) $_state.addTypes(resultType); $_state.addAttribute("callee", $_builder.getSymbolRefAttr(func)); $_state.addAttributes(attributes); @@ -515,12 +515,10 @@ def LLVM_ShuffleVectorOp : LLVM_Op<"shufflevector", [NoSideEffect]> { OpBuilderDAG<(ins "Value":$v1, "Value":$v2, "ArrayAttr":$mask, CArg<"ArrayRef", "{}">:$attrs)>]; let verifier = [{ - auto wrappedVectorType1 = v1().getType().cast(); - auto wrappedVectorType2 = v2().getType().cast(); - if (!wrappedVectorType2.isVectorTy()) - return emitOpError("expected LLVM IR Dialect vector type for operand #2"); - if (wrappedVectorType1.getVectorElementType() != - wrappedVectorType2.getVectorElementType()) + auto wrappedVectorType1 = v1().getType().cast(); + auto wrappedVectorType2 = v2().getType().cast(); + if (wrappedVectorType1.getElementType() != + wrappedVectorType2.getElementType()) return emitOpError("expected matching LLVM IR Dialect element types"); return success(); }]; @@ -768,13 +766,13 @@ def LLVM_AddressOfOp : LLVM_Op<"mlir.addressof"> { CArg<"ArrayRef", "{}">:$attrs), [{ build($_builder, $_state, - global.getType().getPointerTo(global.addr_space()), + LLVM::LLVMPointerType::get(global.getType(), global.addr_space()), global.sym_name(), attrs);}]>, OpBuilderDAG<(ins "LLVMFuncOp":$func, CArg<"ArrayRef", "{}">:$attrs), [{ build($_builder, $_state, - func.getType().getPointerTo(), func.getName(), attrs);}]> + LLVM::LLVMPointerType::get(func.getType()), func.getName(), attrs);}]> ]; let extraClassDeclaration = [{ @@ -970,12 +968,12 @@ def LLVM_LLVMFuncOp : LLVM_Op<"func", // to match the signature of the function. Block *addEntryBlock(); - LLVMType getType() { + LLVMFunctionType getType() { return (*this)->getAttrOfType(getTypeAttrName()) - .getValue().cast(); + .getValue().cast(); } bool isVarArg() { - return getType().isFunctionVarArg(); + return getType().isVarArg(); } // Hook for OpTrait::FunctionLike, returns the number of function arguments`. diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMTypes.h b/mlir/include/mlir/Dialect/LLVMIR/LLVMTypes.h index f92bdf9e3041a..e1938c12c809e 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMTypes.h +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMTypes.h @@ -80,58 +80,6 @@ class LLVMType : public Type { LLVMDialect &getDialect(); - /// Returns the size of a primitive type (including vectors) in bits, for - /// example, the size of !llvm.i16 is 16 and the size of !llvm.vec<4 x i16> - /// is 64. Returns 0 for non-primitive (aggregates such as struct) or types - /// that don't have a size (such as void). - llvm::TypeSize getPrimitiveSizeInBits(); - - /// Floating-point type utilities. - bool isBFloatTy() { return isa(); } - bool isHalfTy() { return isa(); } - bool isFloatTy() { return isa(); } - bool isDoubleTy() { return isa(); } - bool isFP128Ty() { return isa(); } - bool isX86_FP80Ty() { return isa(); } - bool isFloatingPointTy() { - return isa() || isa() || - isa() || isa() || - isa() || isa(); - } - - /// Array type utilities. - LLVMType getArrayElementType(); - unsigned getArrayNumElements(); - bool isArrayTy(); - - /// Integer type utilities. - bool isIntegerTy() { return isa(); } - bool isIntegerTy(unsigned bitwidth); - unsigned getIntegerBitWidth(); - - /// Vector type utilities. - LLVMType getVectorElementType(); - unsigned getVectorNumElements(); - llvm::ElementCount getVectorElementCount(); - bool isVectorTy(); - - /// Function type utilities. - LLVMType getFunctionParamType(unsigned argIdx); - unsigned getFunctionNumParams(); - LLVMType getFunctionResultType(); - bool isFunctionTy(); - bool isFunctionVarArg(); - - /// Pointer type utilities. - LLVMType getPointerTo(unsigned addrSpace = 0); - LLVMType getPointerElementTy(); - bool isPointerTy(); - - /// Struct type utilities. - LLVMType getStructElementType(unsigned i); - unsigned getStructNumElements(); - bool isStructTy(); - /// Utilities used to generate floating point types. static LLVMType getDoubleTy(MLIRContext *context); static LLVMType getFloatTy(MLIRContext *context); @@ -148,9 +96,7 @@ class LLVMType : public Type { static LLVMType getInt8Ty(MLIRContext *context) { return getIntNTy(context, /*numBits=*/8); } - static LLVMType getInt8PtrTy(MLIRContext *context) { - return getInt8Ty(context).getPointerTo(); - } + static LLVMType getInt8PtrTy(MLIRContext *context); static LLVMType getInt16Ty(MLIRContext *context) { return getIntNTy(context, /*numBits=*/16); } @@ -184,7 +130,6 @@ class LLVMType : public Type { /// Void type utilities. static LLVMType getVoidTy(MLIRContext *context); - bool isVoidTy(); // Creation and setting of LLVM's identified struct types static LLVMType createStructTy(MLIRContext *context, @@ -585,6 +530,24 @@ LLVMType parseType(DialectAsmParser &parser); void printType(LLVMType type, DialectAsmPrinter &printer); } // namespace detail +//===----------------------------------------------------------------------===// +// Utility functions. +//===----------------------------------------------------------------------===// + +/// Returns `true` if the given type is compatible with the LLVM dialect. +inline bool isCompatibleType(Type type) { return type.isa(); } + +inline bool isCompatibleFloatingPointType(Type type) { + return type.isa(); +} + +/// Returns the size of the given primitive LLVM dialect-compatible type +/// (including vectors) in bits, for example, the size of !llvm.i16 is 16 and +/// the size of !llvm.vec<4 x i16> is 64. Returns 0 for non-primitive +/// (aggregates such as struct) or types that don't have a size (such as void). +llvm::TypeSize getPrimitiveTypeSizeInBits(Type type); + } // namespace LLVM } // namespace mlir diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td index 1f9b860eb52eb..3c73cdf64eb70 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td @@ -109,10 +109,11 @@ def NVVM_ShflBflyOp : let verifier = [{ if (!(*this)->getAttrOfType("return_value_and_is_valid")) return success(); - auto type = getType().cast(); - if (!type.isStructTy() || type.getStructNumElements() != 2 || - !type.getStructElementType(1).isIntegerTy( - /*Bitwidth=*/1)) + auto type = getType().dyn_cast(); + auto elementType = (type && type.getBody().size() == 2) + ? type.getBody()[1].dyn_cast() + : nullptr; + if (!elementType || elementType.getBitWidth() != 1) return emitError("expected return type to be a two-element struct with " "i1 as the second element"); return success(); diff --git a/mlir/lib/Conversion/AsyncToLLVM/AsyncToLLVM.cpp b/mlir/lib/Conversion/AsyncToLLVM/AsyncToLLVM.cpp index 273754fe2480c..65545d8ab2de1 100644 --- a/mlir/lib/Conversion/AsyncToLLVM/AsyncToLLVM.cpp +++ b/mlir/lib/Conversion/AsyncToLLVM/AsyncToLLVM.cpp @@ -79,7 +79,7 @@ struct AsyncAPI { static FunctionType executeFunctionType(MLIRContext *ctx) { auto hdl = LLVM::LLVMType::getInt8PtrTy(ctx); - auto resume = resumeFunctionType(ctx).getPointerTo(); + auto resume = LLVM::LLVMPointerType::get(resumeFunctionType(ctx)); return FunctionType::get(ctx, {hdl, resume}, {}); } @@ -91,13 +91,13 @@ struct AsyncAPI { static FunctionType awaitAndExecuteFunctionType(MLIRContext *ctx) { auto hdl = LLVM::LLVMType::getInt8PtrTy(ctx); - auto resume = resumeFunctionType(ctx).getPointerTo(); + auto resume = LLVM::LLVMPointerType::get(resumeFunctionType(ctx)); return FunctionType::get(ctx, {TokenType::get(ctx), hdl, resume}, {}); } static FunctionType awaitAllAndExecuteFunctionType(MLIRContext *ctx) { auto hdl = LLVM::LLVMType::getInt8PtrTy(ctx); - auto resume = resumeFunctionType(ctx).getPointerTo(); + auto resume = LLVM::LLVMPointerType::get(resumeFunctionType(ctx)); return FunctionType::get(ctx, {GroupType::get(ctx), hdl, resume}, {}); } @@ -507,7 +507,7 @@ outlineExecuteOp(SymbolTable &symbolTable, ExecuteOp execute) { // A pointer to coroutine resume intrinsic wrapper. auto resumeFnTy = AsyncAPI::resumeFunctionType(ctx); auto resumePtr = builder.create( - loc, resumeFnTy.getPointerTo(), kResume); + loc, LLVM::LLVMPointerType::get(resumeFnTy), kResume); // Save the coroutine state: @llvm.coro.save auto coroSave = builder.create( @@ -750,7 +750,7 @@ class AwaitOpLoweringBase : public ConversionPattern { // A pointer to coroutine resume intrinsic wrapper. auto resumeFnTy = AsyncAPI::resumeFunctionType(ctx); auto resumePtr = builder.create( - loc, resumeFnTy.getPointerTo(), kResume); + loc, LLVM::LLVMPointerType::get(resumeFnTy), kResume); // Save the coroutine state: @llvm.coro.save auto coroSave = builder.create( diff --git a/mlir/lib/Conversion/GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp b/mlir/lib/Conversion/GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp index 41a079c44eea5..bbb2bf1e04ff2 100644 --- a/mlir/lib/Conversion/GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp +++ b/mlir/lib/Conversion/GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp @@ -55,14 +55,14 @@ class FunctionCallBuilder { FunctionCallBuilder(StringRef functionName, LLVM::LLVMType returnType, ArrayRef argumentTypes) : functionName(functionName), - functionType(LLVM::LLVMType::getFunctionTy(returnType, argumentTypes, - /*isVarArg=*/false)) {} + functionType(LLVM::LLVMFunctionType::get(returnType, argumentTypes, + /*isVarArg=*/false)) {} LLVM::CallOp create(Location loc, OpBuilder &builder, ArrayRef arguments) const; private: StringRef functionName; - LLVM::LLVMType functionType; + LLVM::LLVMFunctionType functionType; }; template @@ -76,7 +76,8 @@ class ConvertOpToGpuRuntimeCallPattern : public ConvertOpToLLVMPattern { LLVM::LLVMType llvmVoidType = LLVM::LLVMType::getVoidTy(context); LLVM::LLVMType llvmPointerType = LLVM::LLVMType::getInt8PtrTy(context); - LLVM::LLVMType llvmPointerPointerType = llvmPointerType.getPointerTo(); + LLVM::LLVMType llvmPointerPointerType = + LLVM::LLVMPointerType::get(llvmPointerType); LLVM::LLVMType llvmInt8Type = LLVM::LLVMType::getInt8Ty(context); LLVM::LLVMType llvmInt32Type = LLVM::LLVMType::getInt32Ty(context); LLVM::LLVMType llvmInt64Type = LLVM::LLVMType::getInt64Ty(context); @@ -312,7 +313,7 @@ LLVM::CallOp FunctionCallBuilder::create(Location loc, OpBuilder &builder, .create(loc, functionName, functionType); }(); return builder.create( - loc, const_cast(functionType).getFunctionResultType(), + loc, const_cast(functionType).getReturnType(), builder.getSymbolRefAttr(function), arguments); } @@ -518,7 +519,7 @@ Value ConvertLaunchFuncOpToGpuRuntimeCallPattern::generateParamsArray( auto one = builder.create(loc, llvmInt32Type, builder.getI32IntegerAttr(1)); auto structPtr = builder.create( - loc, structType.getPointerTo(), one, /*alignment=*/0); + loc, LLVM::LLVMPointerType::get(structType), one, /*alignment=*/0); auto arraySize = builder.create( loc, llvmInt32Type, builder.getI32IntegerAttr(numArguments)); auto arrayPtr = builder.create(loc, llvmPointerPointerType, @@ -529,7 +530,7 @@ Value ConvertLaunchFuncOpToGpuRuntimeCallPattern::generateParamsArray( auto index = builder.create( loc, llvmInt32Type, builder.getI32IntegerAttr(en.index())); auto fieldPtr = builder.create( - loc, argumentTypes[en.index()].getPointerTo(), structPtr, + loc, LLVM::LLVMPointerType::get(argumentTypes[en.index()]), structPtr, ArrayRef{zero, index.getResult()}); builder.create(loc, en.value(), fieldPtr); auto elementPtr = builder.create(loc, llvmPointerPointerType, diff --git a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.h b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.h index bf17200e594f1..914b7ee50cf93 100644 --- a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.h +++ b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.h @@ -51,8 +51,8 @@ struct GPUFuncOpLowering : ConvertOpToLLVMPattern { // Rewrite the original GPU function to an LLVM function. auto funcType = typeConverter->convertType(gpuFuncOp.getType()) - .template cast() - .getPointerElementTy(); + .template cast() + .getElementType(); // Remap proper input types. TypeConverter::SignatureConversion signatureConversion( @@ -94,10 +94,11 @@ struct GPUFuncOpLowering : ConvertOpToLLVMPattern { for (auto en : llvm::enumerate(workgroupBuffers)) { LLVM::GlobalOp global = en.value(); Value address = rewriter.create(loc, global); - auto elementType = global.getType().getArrayElementType(); + auto elementType = + global.getType().cast().getElementType(); Value memory = rewriter.create( - loc, elementType.getPointerTo(global.addr_space()), address, - ArrayRef{zero, zero}); + loc, LLVM::LLVMPointerType::get(elementType, global.addr_space()), + address, ArrayRef{zero, zero}); // Build a memref descriptor pointing to the buffer to plug with the // existing memref infrastructure. This may use more registers than @@ -123,9 +124,10 @@ struct GPUFuncOpLowering : ConvertOpToLLVMPattern { // Explicitly drop memory space when lowering private memory // attributions since NVVM models it as `alloca`s in the default // memory space and does not support `alloca`s with addrspace(5). - auto ptrType = typeConverter->convertType(type.getElementType()) - .template cast() - .getPointerTo(AllocaAddrSpace); + auto ptrType = LLVM::LLVMPointerType::get( + typeConverter->convertType(type.getElementType()) + .template cast(), + AllocaAddrSpace); Value numElements = rewriter.create( gpuFuncOp.getLoc(), int64Ty, rewriter.getI64IntegerAttr(type.getNumElements())); diff --git a/mlir/lib/Conversion/GPUCommon/OpToFuncCallLowering.h b/mlir/lib/Conversion/GPUCommon/OpToFuncCallLowering.h index 9d08aeee19061..b2887aa1d7829 100644 --- a/mlir/lib/Conversion/GPUCommon/OpToFuncCallLowering.h +++ b/mlir/lib/Conversion/GPUCommon/OpToFuncCallLowering.h @@ -57,7 +57,8 @@ struct OpToFuncCallLowering : public ConvertOpToLLVMPattern { LLVMType resultType = castedOperands.front().getType().cast(); LLVMType funcType = getFunctionType(resultType, castedOperands); - StringRef funcName = getFunctionName(funcType.getFunctionResultType()); + StringRef funcName = getFunctionName( + funcType.cast().getReturnType()); if (funcName.empty()) return failure(); @@ -80,7 +81,7 @@ struct OpToFuncCallLowering : public ConvertOpToLLVMPattern { private: Value maybeCast(Value operand, PatternRewriter &rewriter) const { LLVM::LLVMType type = operand.getType().cast(); - if (!type.isHalfTy()) + if (!type.isa()) return operand; return rewriter.create( @@ -100,9 +101,9 @@ struct OpToFuncCallLowering : public ConvertOpToLLVMPattern { } StringRef getFunctionName(LLVM::LLVMType type) const { - if (type.isFloatTy()) + if (type.isa()) return f32Func; - if (type.isDoubleTy()) + if (type.isa()) return f64Func; return ""; } diff --git a/mlir/lib/Conversion/GPUToVulkan/ConvertLaunchFuncToVulkanCalls.cpp b/mlir/lib/Conversion/GPUToVulkan/ConvertLaunchFuncToVulkanCalls.cpp index 355bced96ae75..c676cd256d66a 100644 --- a/mlir/lib/Conversion/GPUToVulkan/ConvertLaunchFuncToVulkanCalls.cpp +++ b/mlir/lib/Conversion/GPUToVulkan/ConvertLaunchFuncToVulkanCalls.cpp @@ -75,7 +75,7 @@ class VulkanLaunchFuncToVulkanCallsPass // int64_t sizes[Rank]; // omitted when rank == 0 // int64_t strides[Rank]; // omitted when rank == 0 // }; - auto llvmPtrToElementType = elemenType.getPointerTo(); + auto llvmPtrToElementType = LLVM::LLVMPointerType::get(elemenType); auto llvmArrayRankElementSizeType = LLVM::LLVMType::getArrayTy(getInt64Type(), rank); @@ -131,16 +131,18 @@ class VulkanLaunchFuncToVulkanCallsPass /// Returns a string representation from the given `type`. StringRef stringifyType(LLVM::LLVMType type) { - if (type.isFloatTy()) + if (type.isa()) return "Float"; - if (type.isHalfTy()) + if (type.isa()) return "Half"; - if (type.isIntegerTy(32)) - return "Int32"; - if (type.isIntegerTy(16)) - return "Int16"; - if (type.isIntegerTy(8)) - return "Int8"; + if (auto intType = type.dyn_cast()) { + if (intType.getBitWidth() == 32) + return "Int32"; + if (intType.getBitWidth() == 16) + return "Int16"; + if (intType.getBitWidth() == 8) + return "Int8"; + } llvm_unreachable("unsupported type"); } @@ -238,11 +240,11 @@ void VulkanLaunchFuncToVulkanCallsPass::createBindMemRefCalls( llvm::formatv("bindMemRef{0}D{1}", rank, stringifyType(type)).str(); // Special case for fp16 type. Since it is not a supported type in C we use // int16_t and bitcast the descriptor. - if (type.isHalfTy()) { + if (type.isa()) { auto memRefTy = getMemRefType(rank, LLVM::LLVMType::getInt16Ty(&getContext())); ptrToMemRefDescriptor = builder.create( - loc, memRefTy.getPointerTo(), ptrToMemRefDescriptor); + loc, LLVM::LLVMPointerType::get(memRefTy), ptrToMemRefDescriptor); } // Create call to `bindMemRef`. builder.create( @@ -257,11 +259,12 @@ void VulkanLaunchFuncToVulkanCallsPass::createBindMemRefCalls( LogicalResult VulkanLaunchFuncToVulkanCallsPass::deduceMemRefRankAndType( Value ptrToMemRefDescriptor, uint32_t &rank, LLVM::LLVMType &type) { auto llvmPtrDescriptorTy = - ptrToMemRefDescriptor.getType().dyn_cast(); + ptrToMemRefDescriptor.getType().dyn_cast(); if (!llvmPtrDescriptorTy) return failure(); - auto llvmDescriptorTy = llvmPtrDescriptorTy.getPointerElementTy(); + auto llvmDescriptorTy = + llvmPtrDescriptorTy.getElementType().dyn_cast(); // template // struct { // Elem *allocated; @@ -270,15 +273,19 @@ LogicalResult VulkanLaunchFuncToVulkanCallsPass::deduceMemRefRankAndType( // int64_t sizes[Rank]; // omitted when rank == 0 // int64_t strides[Rank]; // omitted when rank == 0 // }; - if (!llvmDescriptorTy || !llvmDescriptorTy.isStructTy()) + if (!llvmDescriptorTy) return failure(); - type = llvmDescriptorTy.getStructElementType(0).getPointerElementTy(); - if (llvmDescriptorTy.getStructNumElements() == 3) { + type = llvmDescriptorTy.getBody()[0] + .cast() + .getElementType(); + if (llvmDescriptorTy.getBody().size() == 3) { rank = 0; return success(); } - rank = llvmDescriptorTy.getStructElementType(3).getArrayNumElements(); + rank = llvmDescriptorTy.getBody()[3] + .cast() + .getNumElements(); return success(); } @@ -326,13 +333,13 @@ void VulkanLaunchFuncToVulkanCallsPass::declareVulkanFunctions(Location loc) { LLVM::LLVMType::getHalfTy(&getContext())}) { std::string fnName = "bindMemRef" + std::to_string(i) + "D" + std::string(stringifyType(type)); - if (type.isHalfTy()) + if (type.isa()) type = LLVM::LLVMType::getInt16Ty(&getContext()); if (!module.lookupSymbol(fnName)) { auto fnType = LLVM::LLVMType::getFunctionTy( getVoidType(), {getPointerType(), getInt32Type(), getInt32Type(), - getMemRefType(i, type).getPointerTo()}, + LLVM::LLVMPointerType::get(getMemRefType(i, type))}, /*isVarArg=*/false); builder.create(loc, fnName, fnType); } diff --git a/mlir/lib/Conversion/SPIRVToLLVM/ConvertSPIRVToLLVM.cpp b/mlir/lib/Conversion/SPIRVToLLVM/ConvertSPIRVToLLVM.cpp index cacb4787edd4e..7da9c47f92199 100644 --- a/mlir/lib/Conversion/SPIRVToLLVM/ConvertSPIRVToLLVM.cpp +++ b/mlir/lib/Conversion/SPIRVToLLVM/ConvertSPIRVToLLVM.cpp @@ -66,8 +66,10 @@ static unsigned getBitWidth(Type type) { /// Returns the bit width of LLVMType integer or vector. static unsigned getLLVMTypeBitWidth(LLVM::LLVMType type) { - return type.isVectorTy() ? type.getVectorElementType().getIntegerBitWidth() - : type.getIntegerBitWidth(); + auto vectorType = type.dyn_cast(); + return (vectorType ? vectorType.getElementType() : type) + .cast() + .getBitWidth(); } /// Creates `IntegerAttribute` with all bits set for given type @@ -265,7 +267,7 @@ static Type convertPointerType(spirv::PointerType type, TypeConverter &converter) { auto pointeeType = converter.convertType(type.getPointeeType()).cast(); - return pointeeType.getPointerTo(); + return LLVM::LLVMPointerType::get(pointeeType); } /// Converts SPIR-V runtime array to LLVM array. Since LLVM allows indexing over diff --git a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp index 6fbcc220a86b0..e37e7e2dc0c11 100644 --- a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp +++ b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp @@ -215,7 +215,7 @@ Type LLVMTypeConverter::convertFunctionType(FunctionType type) { SignatureConversion conversion(type.getNumInputs()); LLVM::LLVMType converted = convertFunctionSignature(type, /*isVariadic=*/false, conversion); - return converted.getPointerTo(); + return LLVM::LLVMPointerType::get(converted); } @@ -267,7 +267,7 @@ LLVMTypeConverter::convertFunctionTypeCWrapper(FunctionType type) { if (!converted) return {}; if (t.isa()) - converted = converted.getPointerTo(); + converted = LLVM::LLVMPointerType::get(converted); inputs.push_back(converted); } @@ -324,7 +324,7 @@ LLVMTypeConverter::getMemRefDescriptorFields(MemRefType type, LLVM::LLVMType elementType = unwrap(convertType(type.getElementType())); if (!elementType) return {}; - auto ptrTy = elementType.getPointerTo(type.getMemorySpace()); + auto ptrTy = LLVM::LLVMPointerType::get(elementType, type.getMemorySpace()); auto indexTy = getIndexType(); SmallVector results = {ptrTy, ptrTy, indexTy}; @@ -396,7 +396,7 @@ Type LLVMTypeConverter::convertMemRefToBarePtr(BaseMemRefType type) { LLVM::LLVMType elementType = unwrap(convertType(type.getElementType())); if (!elementType) return {}; - return elementType.getPointerTo(type.getMemorySpace()); + return LLVM::LLVMPointerType::get(elementType, type.getMemorySpace()); } // Convert an n-D vector type to an LLVM vector type via (n-1)-D array type when @@ -460,7 +460,7 @@ StructBuilder::StructBuilder(Value v) : value(v) { Value StructBuilder::extractPtr(OpBuilder &builder, Location loc, unsigned pos) { - Type type = structType.cast().getStructElementType(pos); + Type type = structType.cast().getBody()[pos]; return builder.create(loc, type, value, builder.getI64ArrayAttr(pos)); } @@ -507,8 +507,9 @@ Value ComplexStructBuilder::imaginary(OpBuilder &builder, Location loc) { MemRefDescriptor::MemRefDescriptor(Value descriptor) : StructBuilder(descriptor) { assert(value != nullptr && "value cannot be null"); - indexType = value.getType().cast().getStructElementType( - kOffsetPosInMemRefDescriptor); + indexType = value.getType() + .cast() + .getBody()[kOffsetPosInMemRefDescriptor]; } /// Builds IR creating an `undef` value of the descriptor type. @@ -618,9 +619,9 @@ Value MemRefDescriptor::size(OpBuilder &builder, Location loc, unsigned pos) { Value MemRefDescriptor::size(OpBuilder &builder, Location loc, Value pos, int64_t rank) { auto indexTy = indexType.cast(); - auto indexPtrTy = indexTy.getPointerTo(); + auto indexPtrTy = LLVM::LLVMPointerType::get(indexTy); auto arrayTy = LLVM::LLVMType::getArrayTy(indexTy, rank); - auto arrayPtrTy = arrayTy.getPointerTo(); + auto arrayPtrTy = LLVM::LLVMPointerType::get(arrayTy); // Copy size values to stack-allocated memory. auto zero = createIndexAttrConstant(builder, loc, indexType, 0); @@ -675,8 +676,8 @@ void MemRefDescriptor::setConstantStride(OpBuilder &builder, Location loc, LLVM::LLVMPointerType MemRefDescriptor::getElementPtrType() { return value.getType() - .cast() - .getStructElementType(kAlignedPtrPosInMemRefDescriptor) + .cast() + .getBody()[kAlignedPtrPosInMemRefDescriptor] .cast(); } @@ -922,7 +923,7 @@ Value UnrankedMemRefDescriptor::offset(OpBuilder &builder, Location loc, Value offsetGep = builder.create( loc, elemPtrPtrType, elementPtrPtr, ValueRange({two})); offsetGep = builder.create( - loc, typeConverter.getIndexType().getPointerTo(), offsetGep); + loc, LLVM::LLVMPointerType::get(typeConverter.getIndexType()), offsetGep); return builder.create(loc, offsetGep); } @@ -939,19 +940,17 @@ void UnrankedMemRefDescriptor::setOffset(OpBuilder &builder, Location loc, Value offsetGep = builder.create( loc, elemPtrPtrType, elementPtrPtr, ValueRange({two})); offsetGep = builder.create( - loc, typeConverter.getIndexType().getPointerTo(), offsetGep); + loc, LLVM::LLVMPointerType::get(typeConverter.getIndexType()), offsetGep); builder.create(loc, offset, offsetGep); } -Value UnrankedMemRefDescriptor::sizeBasePtr(OpBuilder &builder, Location loc, - LLVMTypeConverter &typeConverter, - Value memRefDescPtr, - LLVM::LLVMType elemPtrPtrType) { - LLVM::LLVMType elemPtrTy = elemPtrPtrType.getPointerElementTy(); +Value UnrankedMemRefDescriptor::sizeBasePtr( + OpBuilder &builder, Location loc, LLVMTypeConverter &typeConverter, + Value memRefDescPtr, LLVM::LLVMPointerType elemPtrPtrType) { + LLVM::LLVMType elemPtrTy = elemPtrPtrType.getElementType(); LLVM::LLVMType indexTy = typeConverter.getIndexType(); - LLVM::LLVMType structPtrTy = - LLVM::LLVMType::getStructTy(elemPtrTy, elemPtrTy, indexTy, indexTy) - .getPointerTo(); + LLVM::LLVMType structPtrTy = LLVM::LLVMPointerType::get( + LLVM::LLVMType::getStructTy(elemPtrTy, elemPtrTy, indexTy, indexTy)); Value structPtr = builder.create(loc, structPtrTy, memRefDescPtr); @@ -961,14 +960,15 @@ Value UnrankedMemRefDescriptor::sizeBasePtr(OpBuilder &builder, Location loc, createIndexAttrConstant(builder, loc, typeConverter.getIndexType(), 0); Value three = builder.create(loc, int32_type, builder.getI32IntegerAttr(3)); - return builder.create(loc, indexTy.getPointerTo(), structPtr, - ValueRange({zero, three})); + return builder.create(loc, LLVM::LLVMPointerType::get(indexTy), + structPtr, ValueRange({zero, three})); } Value UnrankedMemRefDescriptor::size(OpBuilder &builder, Location loc, LLVMTypeConverter typeConverter, Value sizeBasePtr, Value index) { - LLVM::LLVMType indexPtrTy = typeConverter.getIndexType().getPointerTo(); + LLVM::LLVMType indexPtrTy = + LLVM::LLVMPointerType::get(typeConverter.getIndexType()); Value sizeStoreGep = builder.create(loc, indexPtrTy, sizeBasePtr, ValueRange({index})); return builder.create(loc, sizeStoreGep); @@ -978,7 +978,8 @@ void UnrankedMemRefDescriptor::setSize(OpBuilder &builder, Location loc, LLVMTypeConverter typeConverter, Value sizeBasePtr, Value index, Value size) { - LLVM::LLVMType indexPtrTy = typeConverter.getIndexType().getPointerTo(); + LLVM::LLVMType indexPtrTy = + LLVM::LLVMPointerType::get(typeConverter.getIndexType()); Value sizeStoreGep = builder.create(loc, indexPtrTy, sizeBasePtr, ValueRange({index})); builder.create(loc, size, sizeStoreGep); @@ -987,7 +988,8 @@ void UnrankedMemRefDescriptor::setSize(OpBuilder &builder, Location loc, Value UnrankedMemRefDescriptor::strideBasePtr(OpBuilder &builder, Location loc, LLVMTypeConverter &typeConverter, Value sizeBasePtr, Value rank) { - LLVM::LLVMType indexPtrTy = typeConverter.getIndexType().getPointerTo(); + LLVM::LLVMType indexPtrTy = + LLVM::LLVMPointerType::get(typeConverter.getIndexType()); return builder.create(loc, indexPtrTy, sizeBasePtr, ValueRange({rank})); } @@ -996,7 +998,8 @@ Value UnrankedMemRefDescriptor::stride(OpBuilder &builder, Location loc, LLVMTypeConverter typeConverter, Value strideBasePtr, Value index, Value stride) { - LLVM::LLVMType indexPtrTy = typeConverter.getIndexType().getPointerTo(); + LLVM::LLVMType indexPtrTy = + LLVM::LLVMPointerType::get(typeConverter.getIndexType()); Value strideStoreGep = builder.create( loc, indexPtrTy, strideBasePtr, ValueRange({index})); return builder.create(loc, strideStoreGep); @@ -1006,7 +1009,8 @@ void UnrankedMemRefDescriptor::setStride(OpBuilder &builder, Location loc, LLVMTypeConverter typeConverter, Value strideBasePtr, Value index, Value stride) { - LLVM::LLVMType indexPtrTy = typeConverter.getIndexType().getPointerTo(); + LLVM::LLVMType indexPtrTy = + LLVM::LLVMPointerType::get(typeConverter.getIndexType()); Value strideStoreGep = builder.create( loc, indexPtrTy, strideBasePtr, ValueRange({index})); builder.create(loc, stride, strideStoreGep); @@ -1100,7 +1104,7 @@ bool ConvertToLLVMPattern::isSupportedMemRefType(MemRefType type) const { Type ConvertToLLVMPattern::getElementPtrType(MemRefType type) const { auto elementType = type.getElementType(); auto structElementType = unwrap(typeConverter->convertType(elementType)); - return structElementType.getPointerTo(type.getMemorySpace()); + return LLVM::LLVMPointerType::get(structElementType, type.getMemorySpace()); } void ConvertToLLVMPattern::getMemRefDescriptorSizes( @@ -1158,8 +1162,8 @@ Value ConvertToLLVMPattern::getSizeInBytes( // %0 = getelementptr %elementType* null, %indexType 1 // %1 = ptrtoint %elementType* %0 to %indexType // which is a common pattern of getting the size of a type in bytes. - auto convertedPtrType = - typeConverter->convertType(type).cast().getPointerTo(); + auto convertedPtrType = LLVM::LLVMPointerType::get( + typeConverter->convertType(type).cast()); auto nullPtr = rewriter.create(loc, convertedPtrType); auto gep = rewriter.create( loc, convertedPtrType, @@ -1315,7 +1319,8 @@ static void wrapExternalFunction(OpBuilder &builder, Location loc, builder, loc, typeConverter, unrankedMemRefType, wrapperArgsRange.take_front(numToDrop)); - auto ptrTy = packed.getType().cast().getPointerTo(); + auto ptrTy = + LLVM::LLVMPointerType::get(packed.getType().cast()); Value one = builder.create( loc, typeConverter.convertType(builder.getIndexType()), builder.getIntegerAttr(builder.getIndexType(), 1)); @@ -1512,11 +1517,12 @@ static NDVectorTypeInfo extractNDVectorTypeInfo(VectorType vectorType, return info; info.arraySizes.reserve(vectorType.getRank() - 1); auto llvmTy = info.llvmArrayTy; - while (llvmTy.isArrayTy()) { - info.arraySizes.push_back(llvmTy.getArrayNumElements()); - llvmTy = llvmTy.getArrayElementType(); + while (llvmTy.isa()) { + info.arraySizes.push_back( + llvmTy.cast().getNumElements()); + llvmTy = llvmTy.cast().getElementType(); } - if (!llvmTy.isVectorTy()) + if (!llvmTy.isa()) return info; info.llvmVectorTy = llvmTy; return info; @@ -1644,7 +1650,7 @@ LogicalResult LLVM::detail::vectorOneToOneRewrite( return failure(); auto llvmArrayTy = operands[0].getType().cast(); - if (!llvmArrayTy.isArrayTy()) + if (!llvmArrayTy.isa()) return oneToOneRewrite(op, targetOp, operands, typeConverter, rewriter); auto callback = [op, targetOp, &rewriter](LLVM::LLVMType llvmVectorTy, @@ -2457,13 +2463,14 @@ struct GetGlobalMemrefOpLowering : public AllocLikeOpLowering { LLVM::LLVMType arrayTy = convertGlobalMemrefTypeToLLVM(type, *getTypeConverter()); auto addressOf = rewriter.create( - loc, arrayTy.getPointerTo(memSpace), getGlobalOp.name()); + loc, LLVM::LLVMPointerType::get(arrayTy, memSpace), getGlobalOp.name()); // Get the address of the first element in the array by creating a GEP with // the address of the GV as the base, and (rank + 1) number of 0 indices. LLVM::LLVMType elementType = unwrap(typeConverter->convertType(type.getElementType())); - LLVM::LLVMType elementPtrType = elementType.getPointerTo(memSpace); + LLVM::LLVMType elementPtrType = + LLVM::LLVMPointerType::get(elementType, memSpace); SmallVector operands = {addressOf}; operands.insert(operands.end(), type.getRank() + 1, @@ -2504,9 +2511,9 @@ struct RsqrtOpLowering : public ConvertOpToLLVMPattern { auto floatType = getElementTypeOrSelf(resultType).cast(); auto floatOne = rewriter.getFloatAttr(floatType, 1.0); - if (!operandType.isArrayTy()) { + if (!operandType.isa()) { LLVM::ConstantOp one; - if (operandType.isVectorTy()) { + if (operandType.isa()) { one = rewriter.create( loc, operandType, SplatElementsAttr::get(resultType.cast(), floatOne)); @@ -2526,8 +2533,10 @@ struct RsqrtOpLowering : public ConvertOpToLLVMPattern { op.getOperation(), operands, *getTypeConverter(), [&](LLVM::LLVMType llvmVectorTy, ValueRange operands) { auto splatAttr = SplatElementsAttr::get( - mlir::VectorType::get({llvmVectorTy.getVectorNumElements()}, - floatType), + mlir::VectorType::get( + {llvmVectorTy.cast() + .getNumElements()}, + floatType), floatOne); auto one = rewriter.create(loc, llvmVectorTy, splatAttr); @@ -2614,12 +2623,13 @@ struct MemRefCastOpLowering : public ConvertOpToLLVMPattern { // ptr = ExtractValueOp src, 1 auto ptr = memRefDesc.memRefDescPtr(rewriter, loc); // castPtr = BitCastOp i8* to structTy* - auto castPtr = - rewriter - .create( - loc, targetStructType.cast().getPointerTo(), - ptr) - .getResult(); + auto castPtr = rewriter + .create( + loc, + LLVM::LLVMPointerType::get( + targetStructType.cast()), + ptr) + .getResult(); // struct = LoadOp castPtr auto loadOp = rewriter.create(loc, castPtr); rewriter.replaceOp(memRefCastOp, loadOp.getResult()); @@ -2654,8 +2664,8 @@ static void extractPointersAndOffset(Location loc, Type elementType = operandType.cast().getElementType(); LLVM::LLVMType llvmElementType = unwrap(typeConverter.convertType(elementType)); - LLVM::LLVMType elementPtrPtrType = - llvmElementType.getPointerTo(memorySpace).getPointerTo(); + LLVM::LLVMType elementPtrPtrType = LLVM::LLVMPointerType::get( + LLVM::LLVMPointerType::get(llvmElementType, memorySpace)); // Extract pointer to the underlying ranked memref descriptor and cast it to // ElemType**. @@ -2700,8 +2710,8 @@ struct MemRefReinterpretCastOpLowering MemRefType targetMemRefType = castOp.getResult().getType().cast(); auto llvmTargetDescriptorTy = typeConverter->convertType(targetMemRefType) - .dyn_cast_or_null(); - if (!llvmTargetDescriptorTy || !llvmTargetDescriptorTy.isStructTy()) + .dyn_cast_or_null(); + if (!llvmTargetDescriptorTy) return failure(); // Create descriptor. @@ -2804,8 +2814,8 @@ struct MemRefReshapeOpLowering // Set pointers and offset. LLVM::LLVMType llvmElementType = unwrap(typeConverter->convertType(elementType)); - LLVM::LLVMType elementPtrPtrType = - llvmElementType.getPointerTo(addressSpace).getPointerTo(); + auto elementPtrPtrType = LLVM::LLVMPointerType::get( + LLVM::LLVMPointerType::get(llvmElementType, addressSpace)); UnrankedMemRefDescriptor::setAllocatedPtr(rewriter, loc, underlyingDescPtr, elementPtrPtrType, allocatedPtr); UnrankedMemRefDescriptor::setAlignedPtr(rewriter, loc, *getTypeConverter(), @@ -2858,7 +2868,7 @@ struct MemRefReshapeOpLowering rewriter.setInsertionPointToStart(bodyBlock); // Copy size from shape to descriptor. - LLVM::LLVMType llvmIndexPtrType = indexType.getPointerTo(); + LLVM::LLVMType llvmIndexPtrType = LLVM::LLVMPointerType::get(indexType); Value sizeLoadGep = rewriter.create( loc, llvmIndexPtrType, shapeOperandPtr, ValueRange{indexArg}); Value size = rewriter.create(loc, sizeLoadGep); @@ -2950,14 +2960,14 @@ struct DimOpLowering : public ConvertOpToLLVMPattern { Value underlyingRankedDesc = unrankedDesc.memRefDescPtr(rewriter, loc); Value scalarMemRefDescPtr = rewriter.create( loc, - typeConverter->convertType(scalarMemRefType) - .cast() - .getPointerTo(addressSpace), + LLVM::LLVMPointerType::get( + typeConverter->convertType(scalarMemRefType).cast(), + addressSpace), underlyingRankedDesc); // Get pointer to offset field of memref descriptor. - Type indexPtrTy = - getTypeConverter()->getIndexType().getPointerTo(addressSpace); + Type indexPtrTy = LLVM::LLVMPointerType::get( + getTypeConverter()->getIndexType(), addressSpace); Value two = rewriter.create( loc, typeConverter->convertType(rewriter.getI32Type()), rewriter.getI32IntegerAttr(2)); @@ -3120,10 +3130,10 @@ struct IndexCastOpLowering : public ConvertOpToLLVMPattern { auto targetType = typeConverter->convertType(indexCastOp.getResult().getType()) - .cast(); - auto sourceType = transformed.in().getType().cast(); - unsigned targetBits = targetType.getIntegerBitWidth(); - unsigned sourceBits = sourceType.getIntegerBitWidth(); + .cast(); + auto sourceType = transformed.in().getType().cast(); + unsigned targetBits = targetType.getBitWidth(); + unsigned sourceBits = sourceType.getBitWidth(); if (targetBits == sourceBits) rewriter.replaceOp(indexCastOp, transformed.in()); @@ -3462,14 +3472,18 @@ struct SubViewOpLowering : public ConvertOpToLLVMPattern { // Copy the buffer pointer from the old descriptor to the new one. Value extracted = sourceMemRef.allocatedPtr(rewriter, loc); Value bitcastPtr = rewriter.create( - loc, targetElementTy.getPointerTo(viewMemRefType.getMemorySpace()), + loc, + LLVM::LLVMPointerType::get(targetElementTy, + viewMemRefType.getMemorySpace()), extracted); targetMemRef.setAllocatedPtr(rewriter, loc, bitcastPtr); // Copy the buffer pointer from the old descriptor to the new one. extracted = sourceMemRef.alignedPtr(rewriter, loc); bitcastPtr = rewriter.create( - loc, targetElementTy.getPointerTo(viewMemRefType.getMemorySpace()), + loc, + LLVM::LLVMPointerType::get(targetElementTy, + viewMemRefType.getMemorySpace()), extracted); targetMemRef.setAlignedPtr(rewriter, loc, bitcastPtr); @@ -3662,7 +3676,9 @@ struct ViewOpLowering : public ConvertOpToLLVMPattern { Value allocatedPtr = sourceMemRef.allocatedPtr(rewriter, loc); auto srcMemRefType = viewOp.source().getType().cast(); Value bitcastPtr = rewriter.create( - loc, targetElementTy.getPointerTo(srcMemRefType.getMemorySpace()), + loc, + LLVM::LLVMPointerType::get(targetElementTy, + srcMemRefType.getMemorySpace()), allocatedPtr); targetMemRef.setAllocatedPtr(rewriter, loc, bitcastPtr); @@ -3671,7 +3687,9 @@ struct ViewOpLowering : public ConvertOpToLLVMPattern { alignedPtr = rewriter.create(loc, alignedPtr.getType(), alignedPtr, adaptor.byte_shift()); bitcastPtr = rewriter.create( - loc, targetElementTy.getPointerTo(srcMemRefType.getMemorySpace()), + loc, + LLVM::LLVMPointerType::get(targetElementTy, + srcMemRefType.getMemorySpace()), alignedPtr); targetMemRef.setAlignedPtr(rewriter, loc, bitcastPtr); @@ -4064,7 +4082,8 @@ Value LLVMTypeConverter::promoteOneMemRefDescriptor(Location loc, Value operand, auto indexType = IndexType::get(context); // Alloca with proper alignment. We do not expect optimizations of this // alloca op and so we omit allocating at the entry block. - auto ptrType = operand.getType().cast().getPointerTo(); + auto ptrType = + LLVM::LLVMPointerType::get(operand.getType().cast()); Value one = builder.create(loc, int64Ty, IntegerAttr::get(indexType, 1)); Value allocated = diff --git a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp index a982b90e0e93b..bcc91e304e72d 100644 --- a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp +++ b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp @@ -193,7 +193,7 @@ static LogicalResult getBasePtr(ConversionPatternRewriter &rewriter, Value base; if (failed(getBase(rewriter, loc, memref, memRefType, base))) return failure(); - auto pType = type.template cast().getPointerTo(); + auto pType = LLVM::LLVMPointerType::get(type.template cast()); base = rewriter.create(loc, pType, base); ptr = rewriter.create(loc, pType, base); return success(); @@ -1100,14 +1100,14 @@ class VectorTypeCastOpConversion return failure(); auto llvmSourceDescriptorTy = - operands[0].getType().dyn_cast(); - if (!llvmSourceDescriptorTy || !llvmSourceDescriptorTy.isStructTy()) + operands[0].getType().dyn_cast(); + if (!llvmSourceDescriptorTy) return failure(); MemRefDescriptor sourceMemRef(operands[0]); auto llvmTargetDescriptorTy = typeConverter->convertType(targetMemRefType) - .dyn_cast_or_null(); - if (!llvmTargetDescriptorTy || !llvmTargetDescriptorTy.isStructTy()) + .dyn_cast_or_null(); + if (!llvmTargetDescriptorTy) return failure(); // Only contiguous source buffers supported atm. @@ -1231,15 +1231,15 @@ class VectorTransferConversion : public ConvertOpToLLVMPattern { // TODO: support alignment when possible. Value dataPtr = this->getStridedElementPtr( loc, memRefType, adaptor.source(), adaptor.indices(), rewriter); - auto vecTy = - toLLVMTy(xferOp.getVectorType()).template cast(); + auto vecTy = toLLVMTy(xferOp.getVectorType()) + .template cast(); Value vectorDataPtr; if (memRefType.getMemorySpace() == 0) - vectorDataPtr = - rewriter.create(loc, vecTy.getPointerTo(), dataPtr); + vectorDataPtr = rewriter.create( + loc, LLVM::LLVMPointerType::get(vecTy), dataPtr); else vectorDataPtr = rewriter.create( - loc, vecTy.getPointerTo(), dataPtr); + loc, LLVM::LLVMPointerType::get(vecTy), dataPtr); if (!xferOp.isMaskedDim(0)) return replaceTransferOpWithLoadOrStore(rewriter, @@ -1253,7 +1253,7 @@ class VectorTransferConversion : public ConvertOpToLLVMPattern { // // TODO: when the leaf transfer rank is k > 1, we need the last `k` // dimensions here. - unsigned vecWidth = vecTy.getVectorNumElements(); + unsigned vecWidth = vecTy.getNumElements(); unsigned lastIndex = llvm::size(xferOp.indices()) - 1; Value off = xferOp.indices()[lastIndex]; Value dim = rewriter.create(loc, xferOp.source(), lastIndex); diff --git a/mlir/lib/Conversion/VectorToROCDL/VectorToROCDL.cpp b/mlir/lib/Conversion/VectorToROCDL/VectorToROCDL.cpp index 973b116ef498c..1335f33e10aa7 100644 --- a/mlir/lib/Conversion/VectorToROCDL/VectorToROCDL.cpp +++ b/mlir/lib/Conversion/VectorToROCDL/VectorToROCDL.cpp @@ -78,9 +78,9 @@ class VectorTransferConversion : public ConvertOpToLLVMPattern { auto toLLVMTy = [&](Type t) { return this->getTypeConverter()->convertType(t); }; - LLVM::LLVMType vecTy = - toLLVMTy(xferOp.getVectorType()).template cast(); - unsigned vecWidth = vecTy.getVectorNumElements(); + auto vecTy = toLLVMTy(xferOp.getVectorType()) + .template cast(); + unsigned vecWidth = vecTy.getNumElements(); Location loc = xferOp->getLoc(); // The backend result vector scalarization have trouble scalarize diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp index 7b1300da1783f..2bdbb877ec84c 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp @@ -105,9 +105,10 @@ static ParseResult parseCmpOp(OpAsmParser &parser, OperationState &result) { auto argType = type.dyn_cast(); if (!argType) return parser.emitError(trailingTypeLoc, "expected LLVM IR dialect type"); - if (argType.isVectorTy()) - resultType = - LLVMType::getVectorTy(resultType, argType.getVectorNumElements()); + if (auto vecArgType = argType.dyn_cast()) + resultType = LLVMType::getVectorTy(resultType, vecArgType.getNumElements()); + assert(!argType.isa() && + "unhandled scalable vector"); result.addTypes({resultType}); return success(); @@ -118,7 +119,7 @@ static ParseResult parseCmpOp(OpAsmParser &parser, OperationState &result) { //===----------------------------------------------------------------------===// static void printAllocaOp(OpAsmPrinter &p, AllocaOp &op) { - auto elemTy = op.getType().cast().getPointerElementTy(); + auto elemTy = op.getType().cast().getElementType(); auto funcTy = FunctionType::get(op.getContext(), {op.arraySize().getType()}, {op.getType()}); @@ -363,14 +364,11 @@ static void printLoadOp(OpAsmPrinter &p, LoadOp &op) { // the resulting type wrapped in MLIR, or nullptr on error. static Type getLoadStoreElementType(OpAsmParser &parser, Type type, llvm::SMLoc trailingTypeLoc) { - auto llvmTy = type.dyn_cast(); + auto llvmTy = type.dyn_cast(); if (!llvmTy) - return parser.emitError(trailingTypeLoc, "expected LLVM IR dialect type"), - nullptr; - if (!llvmTy.isPointerTy()) return parser.emitError(trailingTypeLoc, "expected LLVM pointer type"), nullptr; - return llvmTy.getPointerElementTy(); + return llvmTy.getElementType(); } // ::= `llvm.load` `volatile` ssa-use attribute-dict? `:` type @@ -569,7 +567,7 @@ static ParseResult parseInvokeOp(OpAsmParser &parser, OperationState &result) { auto llvmFuncType = LLVM::LLVMType::getFunctionTy(llvmResultType, argTypes, /*isVarArg=*/false); - auto wrappedFuncType = llvmFuncType.getPointerTo(); + auto wrappedFuncType = LLVM::LLVMPointerType::get(llvmFuncType); auto funcArguments = llvm::makeArrayRef(operands).drop_front(); @@ -613,7 +611,7 @@ static LogicalResult verify(LandingpadOp op) { for (unsigned idx = 0, ie = op.getNumOperands(); idx < ie; idx++) { value = op.getOperand(idx); - bool isFilter = value.getType().cast().isArrayTy(); + bool isFilter = value.getType().isa(); if (isFilter) { // FIXME: Verify filter clauses when arrays are appropriately handled } else { @@ -646,7 +644,7 @@ static void printLandingpadOp(OpAsmPrinter &p, LandingpadOp &op) { for (auto value : op.getOperands()) { // Similar to llvm - if clause is an array type then it is filter // clause else catch clause - bool isArrayTy = value.getType().cast().isArrayTy(); + bool isArrayTy = value.getType().isa(); p << '(' << (isArrayTy ? "filter " : "catch ") << value << " : " << value.getType() << ") "; } @@ -728,37 +726,37 @@ static LogicalResult verify(CallOp &op) { fnType = fn.getType(); } - if (!fnType.isFunctionTy()) + + LLVMFunctionType funcType = fnType.dyn_cast(); + if (!funcType) return op.emitOpError("callee does not have a functional type: ") << fnType; // Verify that the operand and result types match the callee. - if (!fnType.isFunctionVarArg() && - fnType.getFunctionNumParams() != (op.getNumOperands() - isIndirect)) + if (!funcType.isVarArg() && + funcType.getNumParams() != (op.getNumOperands() - isIndirect)) return op.emitOpError() << "incorrect number of operands (" << (op.getNumOperands() - isIndirect) - << ") for callee (expecting: " << fnType.getFunctionNumParams() - << ")"; + << ") for callee (expecting: " << funcType.getNumParams() << ")"; - if (fnType.getFunctionNumParams() > (op.getNumOperands() - isIndirect)) + if (funcType.getNumParams() > (op.getNumOperands() - isIndirect)) return op.emitOpError() << "incorrect number of operands (" << (op.getNumOperands() - isIndirect) << ") for varargs callee (expecting at least: " - << fnType.getFunctionNumParams() << ")"; + << funcType.getNumParams() << ")"; - for (unsigned i = 0, e = fnType.getFunctionNumParams(); i != e; ++i) - if (op.getOperand(i + isIndirect).getType() != - fnType.getFunctionParamType(i)) + for (unsigned i = 0, e = funcType.getNumParams(); i != e; ++i) + if (op.getOperand(i + isIndirect).getType() != funcType.getParamType(i)) return op.emitOpError() << "operand type mismatch for operand " << i << ": " << op.getOperand(i + isIndirect).getType() - << " != " << fnType.getFunctionParamType(i); + << " != " << funcType.getParamType(i); if (op.getNumResults() && - op.getResult(0).getType() != fnType.getFunctionResultType()) + op.getResult(0).getType() != funcType.getReturnType()) return op.emitOpError() << "result type mismatch: " << op.getResult(0).getType() - << " != " << fnType.getFunctionResultType(); + << " != " << funcType.getReturnType(); return success(); } @@ -848,7 +846,7 @@ static ParseResult parseCallOp(OpAsmParser &parser, OperationState &result) { } auto llvmFuncType = LLVM::LLVMType::getFunctionTy(llvmResultType, argTypes, /*isVarArg=*/false); - auto wrappedFuncType = llvmFuncType.getPointerTo(); + auto wrappedFuncType = LLVM::LLVMPointerType::get(llvmFuncType); auto funcArguments = ArrayRef(operands).drop_front(); @@ -875,8 +873,8 @@ static ParseResult parseCallOp(OpAsmParser &parser, OperationState &result) { void LLVM::ExtractElementOp::build(OpBuilder &b, OperationState &result, Value vector, Value position, ArrayRef attrs) { - auto wrappedVectorType = vector.getType().cast(); - auto llvmType = wrappedVectorType.getVectorElementType(); + auto vectorType = vector.getType().cast(); + auto llvmType = vectorType.getElementType(); build(b, result, llvmType, vector, position); result.addAttributes(attrs); } @@ -903,11 +901,11 @@ static ParseResult parseExtractElementOp(OpAsmParser &parser, parser.resolveOperand(vector, type, result.operands) || parser.resolveOperand(position, positionType, result.operands)) return failure(); - auto wrappedVectorType = type.dyn_cast(); - if (!wrappedVectorType || !wrappedVectorType.isVectorTy()) + auto vectorType = type.dyn_cast(); + if (!vectorType) return parser.emitError( loc, "expected LLVM IR dialect vector type for operand #1"); - result.addTypes(wrappedVectorType.getVectorElementType()); + result.addTypes(vectorType.getElementType()); return success(); } @@ -930,8 +928,8 @@ static LLVM::LLVMType getInsertExtractValueElementType(OpAsmParser &parser, ArrayAttr positionAttr, llvm::SMLoc attributeLoc, llvm::SMLoc typeLoc) { - auto wrappedContainerType = containerType.dyn_cast(); - if (!wrappedContainerType) + auto llvmType = containerType.dyn_cast(); + if (!llvmType) return parser.emitError(typeLoc, "expected LLVM IR Dialect type"), nullptr; // Infer the element type from the structure type: iteratively step inside the @@ -945,26 +943,24 @@ static LLVM::LLVMType getInsertExtractValueElementType(OpAsmParser &parser, "expected an array of integer literals"), nullptr; int position = positionElementAttr.getInt(); - if (wrappedContainerType.isArrayTy()) { - if (position < 0 || static_cast(position) >= - wrappedContainerType.getArrayNumElements()) + if (auto arrayType = llvmType.dyn_cast()) { + if (position < 0 || + static_cast(position) >= arrayType.getNumElements()) return parser.emitError(attributeLoc, "position out of bounds"), nullptr; - wrappedContainerType = wrappedContainerType.getArrayElementType(); - } else if (wrappedContainerType.isStructTy()) { - if (position < 0 || static_cast(position) >= - wrappedContainerType.getStructNumElements()) + llvmType = arrayType.getElementType(); + } else if (auto structType = llvmType.dyn_cast()) { + if (position < 0 || + static_cast(position) >= structType.getBody().size()) return parser.emitError(attributeLoc, "position out of bounds"), nullptr; - wrappedContainerType = - wrappedContainerType.getStructElementType(position); + llvmType = structType.getBody()[position]; } else { - return parser.emitError(typeLoc, - "expected wrapped LLVM IR structure/array type"), + return parser.emitError(typeLoc, "expected LLVM IR structure/array type"), nullptr; } } - return wrappedContainerType; + return llvmType; } // ::= `llvm.extractvalue` ssa-use @@ -1021,11 +1017,11 @@ static ParseResult parseInsertElementOp(OpAsmParser &parser, parser.parseColonType(vectorType)) return failure(); - auto wrappedVectorType = vectorType.dyn_cast(); - if (!wrappedVectorType || !wrappedVectorType.isVectorTy()) + auto llvmVectorType = vectorType.dyn_cast(); + if (!llvmVectorType) return parser.emitError( loc, "expected LLVM IR dialect vector type for operand #1"); - auto valueType = wrappedVectorType.getVectorElementType(); + Type valueType = llvmVectorType.getElementType(); if (!valueType) return failure(); @@ -1145,12 +1141,14 @@ static LogicalResult verify(AddressOfOp op) { return op.emitOpError( "must reference a global defined by 'llvm.mlir.global' or 'llvm.func'"); - if (global && global.getType().getPointerTo(global.addr_space()) != - op.getResult().getType()) + if (global && + LLVM::LLVMPointerType::get(global.getType(), global.addr_space()) != + op.getResult().getType()) return op.emitOpError( "the type must be a pointer to the type of the referenced global"); - if (function && function.getType().getPointerTo() != op.getResult().getType()) + if (function && LLVM::LLVMPointerType::get(function.getType()) != + op.getResult().getType()) return op.emitOpError( "the type must be a pointer to the type of the referenced function"); @@ -1276,11 +1274,11 @@ static LogicalResult verifyCast(DialectCastOp op, LLVMType llvmType, if (vectorType.getRank() != 1) return op->emitOpError("only 1-d vector is allowed"); - auto llvmVector = llvmType.dyn_cast(); - if (llvmVector.isa()) + auto llvmVector = llvmType.dyn_cast(); + if (!llvmVector) return op->emitOpError("only fixed-sized vector is allowed"); - if (vectorType.getDimSize(0) != llvmVector.getVectorNumElements()) + if (vectorType.getDimSize(0) != llvmVector.getNumElements()) return op->emitOpError( "invalid cast between vectors with mismatching sizes"); @@ -1375,7 +1373,10 @@ static LogicalResult verifyCast(DialectCastOp op, LLVMType llvmType, "be an index-compatible integer"); auto ptrType = structType.getBody()[1].dyn_cast(); - if (!ptrType || !ptrType.getPointerElementTy().isIntegerTy(8)) + auto ptrElementType = + ptrType ? ptrType.getElementType().dyn_cast() + : nullptr; + if (!ptrElementType || ptrElementType.getBitWidth() != 8) return op->emitOpError("expected second element of a memref descriptor " "to be an !llvm.ptr"); @@ -1503,9 +1504,11 @@ static LogicalResult verify(GlobalOp op) { return op.emitOpError("must appear at the module level"); if (auto strAttr = op.getValueOrNull().dyn_cast_or_null()) { - auto type = op.getType(); - if (!type.isArrayTy() || !type.getArrayElementType().isIntegerTy(8) || - type.getArrayNumElements() != strAttr.getValue().size()) + auto type = op.getType().dyn_cast(); + LLVMIntegerType elementType = + type ? type.getElementType().dyn_cast() : nullptr; + if (!elementType || elementType.getBitWidth() != 8 || + type.getNumElements() != strAttr.getValue().size()) return op.emitOpError( "requires an i8 array type of the length equal to that of the string " "attribute"); @@ -1534,9 +1537,9 @@ static LogicalResult verify(GlobalOp op) { void LLVM::ShuffleVectorOp::build(OpBuilder &b, OperationState &result, Value v1, Value v2, ArrayAttr mask, ArrayRef attrs) { - auto wrappedContainerType1 = v1.getType().cast(); - auto vType = LLVMType::getVectorTy( - wrappedContainerType1.getVectorElementType(), mask.size()); + auto containerType = v1.getType().cast(); + auto vType = + LLVMType::getVectorTy(containerType.getElementType(), mask.size()); build(b, result, vType, v1, v2, mask); result.addAttributes(attrs); } @@ -1566,12 +1569,12 @@ static ParseResult parseShuffleVectorOp(OpAsmParser &parser, parser.resolveOperand(v1, typeV1, result.operands) || parser.resolveOperand(v2, typeV2, result.operands)) return failure(); - auto wrappedContainerType1 = typeV1.dyn_cast(); - if (!wrappedContainerType1 || !wrappedContainerType1.isVectorTy()) + auto containerType = typeV1.dyn_cast(); + if (!containerType) return parser.emitError( loc, "expected LLVM IR dialect vector type for operand #1"); - auto vType = LLVMType::getVectorTy( - wrappedContainerType1.getVectorElementType(), maskAttr.size()); + auto vType = + LLVMType::getVectorTy(containerType.getElementType(), maskAttr.size()); result.addTypes(vType); return success(); } @@ -1588,9 +1591,9 @@ Block *LLVMFuncOp::addEntryBlock() { auto *entry = new Block; push_back(entry); - LLVMType type = getType(); - for (unsigned i = 0, e = type.getFunctionNumParams(); i < e; ++i) - entry->addArgument(type.getFunctionParamType(i)); + LLVMFunctionType type = getType(); + for (unsigned i = 0, e = type.getNumParams(); i < e; ++i) + entry->addArgument(type.getParamType(i)); return entry; } @@ -1608,7 +1611,7 @@ void LLVMFuncOp::build(OpBuilder &builder, OperationState &result, if (argAttrs.empty()) return; - unsigned numInputs = type.getFunctionNumParams(); + unsigned numInputs = type.cast().getNumParams(); assert(numInputs == argAttrs.size() && "expected as many argument attribute lists as arguments"); SmallString<8> argAttrName; @@ -1711,15 +1714,15 @@ static void printLLVMFuncOp(OpAsmPrinter &p, LLVMFuncOp op) { p << stringifyLinkage(op.linkage()) << ' '; p.printSymbolName(op.getName()); - LLVMType fnType = op.getType(); + LLVMFunctionType fnType = op.getType(); SmallVector argTypes; SmallVector resTypes; - argTypes.reserve(fnType.getFunctionNumParams()); - for (unsigned i = 0, e = fnType.getFunctionNumParams(); i < e; ++i) - argTypes.push_back(fnType.getFunctionParamType(i)); + argTypes.reserve(fnType.getNumParams()); + for (unsigned i = 0, e = fnType.getNumParams(); i < e; ++i) + argTypes.push_back(fnType.getParamType(i)); - LLVMType returnType = fnType.getFunctionResultType(); - if (!returnType.isVoidTy()) + LLVMType returnType = fnType.getReturnType(); + if (!returnType.isa()) resTypes.push_back(returnType); impl::printFunctionSignature(p, op, argTypes, op.isVarArg(), resTypes); @@ -1737,8 +1740,8 @@ static void printLLVMFuncOp(OpAsmPrinter &p, LLVMFuncOp op) { // attribute is present. This can check for preconditions of the // getNumArguments hook not failing. LogicalResult LLVMFuncOp::verifyType() { - auto llvmType = getTypeAttr().getValue().dyn_cast_or_null(); - if (!llvmType || !llvmType.isFunctionTy()) + auto llvmType = getTypeAttr().getValue().dyn_cast_or_null(); + if (!llvmType) return emitOpError("requires '" + getTypeAttrName() + "' attribute of wrapped LLVM function type"); @@ -1747,9 +1750,7 @@ LogicalResult LLVMFuncOp::verifyType() { // Hook for OpTrait::FunctionLike, returns the number of function arguments. // Depends on the type attribute being correct as checked by verifyType -unsigned LLVMFuncOp::getNumFuncArguments() { - return getType().getFunctionNumParams(); -} +unsigned LLVMFuncOp::getNumFuncArguments() { return getType().getNumParams(); } // Hook for OpTrait::FunctionLike, returns the number of function results. // Depends on the type attribute being correct as checked by verifyType @@ -1759,7 +1760,7 @@ unsigned LLVMFuncOp::getNumFuncResults() { // If we modeled a void return as one result, then it would be possible to // attach an MLIR result attribute to it, and it isn't clear what semantics we // would assign to that. - if (getType().getFunctionResultType().isVoidTy()) + if (getType().getReturnType().isa()) return 0; return 1; } @@ -1788,7 +1789,7 @@ static LogicalResult verify(LLVMFuncOp op) { if (op.isVarArg()) return op.emitOpError("only external functions can be variadic"); - unsigned numArguments = op.getType().getFunctionNumParams(); + unsigned numArguments = op.getType().getNumParams(); Block &entryBlock = op.front(); for (unsigned i = 0; i < numArguments; ++i) { Type argType = entryBlock.getArgument(i).getType(); @@ -1796,7 +1797,7 @@ static LogicalResult verify(LLVMFuncOp op) { if (!argLLVMType) return op.emitOpError("entry block argument #") << i << " is not of LLVM type"; - if (op.getType().getFunctionParamType(i) != argLLVMType) + if (op.getType().getParamType(i) != argLLVMType) return op.emitOpError("the type of entry block argument #") << i << " does not match the function signature"; } @@ -1896,7 +1897,8 @@ static ParseResult parseAtomicRMWOp(OpAsmParser &parser, parseAtomicOrdering(parser, result, "ordering") || parser.parseOptionalAttrDict(result.attributes) || parser.parseColonType(type) || - parser.resolveOperand(ptr, type.getPointerTo(), result.operands) || + parser.resolveOperand(ptr, LLVM::LLVMPointerType::get(type), + result.operands) || parser.resolveOperand(val, type, result.operands)) return failure(); @@ -1905,9 +1907,9 @@ static ParseResult parseAtomicRMWOp(OpAsmParser &parser, } static LogicalResult verify(AtomicRMWOp op) { - auto ptrType = op.ptr().getType().cast(); + auto ptrType = op.ptr().getType().cast(); auto valType = op.val().getType().cast(); - if (valType != ptrType.getPointerElementTy()) + if (valType != ptrType.getElementType()) return op.emitOpError("expected LLVM IR element type for operand #0 to " "match type for operand #1"); auto resType = op.res().getType().cast(); @@ -1915,17 +1917,21 @@ static LogicalResult verify(AtomicRMWOp op) { return op.emitOpError( "expected LLVM IR result type to match type for operand #1"); if (op.bin_op() == AtomicBinOp::fadd || op.bin_op() == AtomicBinOp::fsub) { - if (!valType.isFloatingPointTy()) + if (!mlir::LLVM::isCompatibleFloatingPointType(valType)) return op.emitOpError("expected LLVM IR floating point type"); } else if (op.bin_op() == AtomicBinOp::xchg) { - if (!valType.isIntegerTy(8) && !valType.isIntegerTy(16) && - !valType.isIntegerTy(32) && !valType.isIntegerTy(64) && - !valType.isBFloatTy() && !valType.isHalfTy() && !valType.isFloatTy() && - !valType.isDoubleTy()) + auto intType = valType.dyn_cast(); + unsigned intBitWidth = intType ? intType.getBitWidth() : 0; + if (intBitWidth != 8 && intBitWidth != 16 && intBitWidth != 32 && + intBitWidth != 64 && !valType.isa() && + !valType.isa() && !valType.isa() && + !valType.isa()) return op.emitOpError("unexpected LLVM IR type for 'xchg' bin_op"); } else { - if (!valType.isIntegerTy(8) && !valType.isIntegerTy(16) && - !valType.isIntegerTy(32) && !valType.isIntegerTy(64)) + auto intType = valType.dyn_cast(); + unsigned intBitWidth = intType ? intType.getBitWidth() : 0; + if (intBitWidth != 8 && intBitWidth != 16 && intBitWidth != 32 && + intBitWidth != 64) return op.emitOpError("expected LLVM IR integer type"); } return success(); @@ -1958,7 +1964,8 @@ static ParseResult parseAtomicCmpXchgOp(OpAsmParser &parser, parseAtomicOrdering(parser, result, "failure_ordering") || parser.parseOptionalAttrDict(result.attributes) || parser.parseColonType(type) || - parser.resolveOperand(ptr, type.getPointerTo(), result.operands) || + parser.resolveOperand(ptr, LLVM::LLVMPointerType::get(type), + result.operands) || parser.resolveOperand(cmp, type, result.operands) || parser.resolveOperand(val, type, result.operands)) return failure(); @@ -1971,18 +1978,20 @@ static ParseResult parseAtomicCmpXchgOp(OpAsmParser &parser, } static LogicalResult verify(AtomicCmpXchgOp op) { - auto ptrType = op.ptr().getType().cast(); - if (!ptrType.isPointerTy()) + auto ptrType = op.ptr().getType().cast(); + if (!ptrType) return op.emitOpError("expected LLVM IR pointer type for operand #0"); auto cmpType = op.cmp().getType().cast(); auto valType = op.val().getType().cast(); - if (cmpType != ptrType.getPointerElementTy() || cmpType != valType) + if (cmpType != ptrType.getElementType() || cmpType != valType) return op.emitOpError("expected LLVM IR element type for operand #0 to " "match type for all other operands"); - if (!valType.isPointerTy() && !valType.isIntegerTy(8) && - !valType.isIntegerTy(16) && !valType.isIntegerTy(32) && - !valType.isIntegerTy(64) && !valType.isBFloatTy() && - !valType.isHalfTy() && !valType.isFloatTy() && !valType.isDoubleTy()) + auto intType = valType.dyn_cast(); + unsigned intBitWidth = intType ? intType.getBitWidth() : 0; + if (!valType.isa() && intBitWidth != 8 && + intBitWidth != 16 && intBitWidth != 32 && intBitWidth != 64 && + !valType.isa() && !valType.isa() && + !valType.isa() && !valType.isa()) return op.emitOpError("unexpected LLVM IR type"); if (op.success_ordering() < AtomicOrdering::monotonic || op.failure_ordering() < AtomicOrdering::monotonic) diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp index a89287b764e5d..0616efb7ef3f9 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp @@ -36,129 +36,6 @@ LLVMDialect &LLVMType::getDialect() { return static_cast(Type::getDialect()); } -//----------------------------------------------------------------------------// -// Misc type utilities. - -llvm::TypeSize LLVMType::getPrimitiveSizeInBits() { - return llvm::TypeSwitch(*this) - .Case( - [](LLVMType) { return llvm::TypeSize::Fixed(16); }) - .Case([](LLVMType) { return llvm::TypeSize::Fixed(32); }) - .Case( - [](LLVMType) { return llvm::TypeSize::Fixed(64); }) - .Case([](LLVMIntegerType intTy) { - return llvm::TypeSize::Fixed(intTy.getBitWidth()); - }) - .Case([](LLVMType) { return llvm::TypeSize::Fixed(80); }) - .Case( - [](LLVMType) { return llvm::TypeSize::Fixed(128); }) - .Case([](LLVMVectorType t) { - llvm::TypeSize elementSize = - t.getElementType().getPrimitiveSizeInBits(); - llvm::ElementCount elementCount = t.getElementCount(); - assert(!elementSize.isScalable() && - "vector type should have fixed-width elements"); - return llvm::TypeSize(elementSize.getFixedSize() * - elementCount.getKnownMinValue(), - elementCount.isScalable()); - }) - .Default([](LLVMType ty) { - assert((ty.isa()) && - "unexpected missing support for primitive type"); - return llvm::TypeSize::Fixed(0); - }); -} - -//----------------------------------------------------------------------------// -// Integer type utilities. - -bool LLVMType::isIntegerTy(unsigned bitwidth) { - if (auto intType = dyn_cast()) - return intType.getBitWidth() == bitwidth; - return false; -} -unsigned LLVMType::getIntegerBitWidth() { - return cast().getBitWidth(); -} - -LLVMType LLVMType::getArrayElementType() { - return cast().getElementType(); -} - -//----------------------------------------------------------------------------// -// Array type utilities. - -unsigned LLVMType::getArrayNumElements() { - return cast().getNumElements(); -} - -bool LLVMType::isArrayTy() { return isa(); } - -//----------------------------------------------------------------------------// -// Vector type utilities. - -LLVMType LLVMType::getVectorElementType() { - return cast().getElementType(); -} - -unsigned LLVMType::getVectorNumElements() { - return cast().getNumElements(); -} -llvm::ElementCount LLVMType::getVectorElementCount() { - return cast().getElementCount(); -} - -bool LLVMType::isVectorTy() { return isa(); } - -//----------------------------------------------------------------------------// -// Function type utilities. - -LLVMType LLVMType::getFunctionParamType(unsigned argIdx) { - return cast().getParamType(argIdx); -} - -unsigned LLVMType::getFunctionNumParams() { - return cast().getNumParams(); -} - -LLVMType LLVMType::getFunctionResultType() { - return cast().getReturnType(); -} - -bool LLVMType::isFunctionTy() { return isa(); } - -bool LLVMType::isFunctionVarArg() { - return cast().isVarArg(); -} - -//----------------------------------------------------------------------------// -// Pointer type utilities. - -LLVMType LLVMType::getPointerTo(unsigned addrSpace) { - return LLVMPointerType::get(*this, addrSpace); -} - -LLVMType LLVMType::getPointerElementTy() { - return cast().getElementType(); -} - -bool LLVMType::isPointerTy() { return isa(); } - -//----------------------------------------------------------------------------// -// Struct type utilities. - -LLVMType LLVMType::getStructElementType(unsigned i) { - return cast().getBody()[i]; -} - -unsigned LLVMType::getStructNumElements() { - return cast().getBody().size(); -} - -bool LLVMType::isStructTy() { return isa(); } - //----------------------------------------------------------------------------// // Utilities used to generate floating point types. @@ -193,6 +70,10 @@ LLVMType LLVMType::getIntNTy(MLIRContext *context, unsigned numBits) { return LLVMIntegerType::get(context, numBits); } +LLVMType LLVMType::getInt8PtrTy(MLIRContext *context) { + return LLVMPointerType::get(LLVMIntegerType::get(context, 8)); +} + //----------------------------------------------------------------------------// // Utilities used to generate other miscellaneous types. @@ -221,8 +102,6 @@ LLVMType LLVMType::getVoidTy(MLIRContext *context) { return LLVMVoidType::get(context); } -bool LLVMType::isVoidTy() { return isa(); } - //----------------------------------------------------------------------------// // Creation and setting of LLVM's identified struct types @@ -470,7 +349,7 @@ LLVMStructType::verifyConstructionInvariants(Location loc, bool LLVMVectorType::isValidElementType(LLVMType type) { return type.isa() || - type.isFloatingPointTy(); + mlir::LLVM::isCompatibleFloatingPointType(type); } /// Support type casting functionality. @@ -536,3 +415,42 @@ LLVMScalableVectorType::getChecked(Location loc, LLVMType elementType, unsigned LLVMScalableVectorType::getMinNumElements() { return getImpl()->numElements; } + +//===----------------------------------------------------------------------===// +// Utility functions. +//===----------------------------------------------------------------------===// + +llvm::TypeSize mlir::LLVM::getPrimitiveTypeSizeInBits(Type type) { + assert(isCompatibleType(type) && + "expected a type compatible with the LLVM dialect"); + + return llvm::TypeSwitch(type) + .Case( + [](LLVMType) { return llvm::TypeSize::Fixed(16); }) + .Case([](LLVMType) { return llvm::TypeSize::Fixed(32); }) + .Case( + [](LLVMType) { return llvm::TypeSize::Fixed(64); }) + .Case([](LLVMIntegerType intTy) { + return llvm::TypeSize::Fixed(intTy.getBitWidth()); + }) + .Case([](LLVMType) { return llvm::TypeSize::Fixed(80); }) + .Case( + [](LLVMType) { return llvm::TypeSize::Fixed(128); }) + .Case([](LLVMVectorType t) { + llvm::TypeSize elementSize = + getPrimitiveTypeSizeInBits(t.getElementType()); + llvm::ElementCount elementCount = t.getElementCount(); + assert(!elementSize.isScalable() && + "vector type should have fixed-width elements"); + return llvm::TypeSize(elementSize.getFixedSize() * + elementCount.getKnownMinValue(), + elementCount.isScalable()); + }) + .Default([](Type ty) { + assert((ty.isa()) && + "unexpected missing support for primitive type"); + return llvm::TypeSize::Fixed(0); + }); +} diff --git a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp index 707ff7c1b089b..c202075fa2066 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp @@ -57,8 +57,9 @@ static ParseResult parseNVVMShflSyncBflyOp(OpAsmParser &parser, for (auto &attr : result.attributes) { if (attr.first != "return_value_and_is_valid") continue; - if (type.isStructTy() && type.getStructNumElements() > 0) - type = type.getStructElementType(0); + auto structType = type.dyn_cast(); + if (structType && !structType.getBody().empty()) + type = structType.getBody()[0]; break; } diff --git a/mlir/lib/ExecutionEngine/JitRunner.cpp b/mlir/lib/ExecutionEngine/JitRunner.cpp index a323e68170c1b..bfdae2b4588d5 100644 --- a/mlir/lib/ExecutionEngine/JitRunner.cpp +++ b/mlir/lib/ExecutionEngine/JitRunner.cpp @@ -196,19 +196,30 @@ template Error checkCompatibleReturnType(LLVM::LLVMFuncOp mainFunction); template <> Error checkCompatibleReturnType(LLVM::LLVMFuncOp mainFunction) { - if (!mainFunction.getType().getFunctionResultType().isIntegerTy(32)) + auto resultType = mainFunction.getType() + .cast() + .getReturnType() + .dyn_cast(); + if (!resultType || resultType.getBitWidth() != 32) return make_string_error("only single llvm.i32 function result supported"); return Error::success(); } template <> Error checkCompatibleReturnType(LLVM::LLVMFuncOp mainFunction) { - if (!mainFunction.getType().getFunctionResultType().isIntegerTy(64)) + auto resultType = mainFunction.getType() + .cast() + .getReturnType() + .dyn_cast(); + if (!resultType || resultType.getBitWidth() != 64) return make_string_error("only single llvm.i64 function result supported"); return Error::success(); } template <> Error checkCompatibleReturnType(LLVM::LLVMFuncOp mainFunction) { - if (!mainFunction.getType().getFunctionResultType().isFloatTy()) + if (!mainFunction.getType() + .cast() + .getReturnType() + .isa()) return make_string_error("only single llvm.f32 function result supported"); return Error::success(); } @@ -220,7 +231,7 @@ Error compileAndExecuteSingleReturnFunction(Options &options, ModuleOp module, if (!mainFunction || mainFunction.isExternal()) return make_string_error("entry point not found"); - if (mainFunction.getType().getFunctionNumParams() != 0) + if (mainFunction.getType().cast().getNumParams() != 0) return make_string_error("function inputs not supported"); if (Error error = checkCompatibleReturnType(mainFunction)) diff --git a/mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp b/mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp index 7f89a41de5db7..9786751ef4b0d 100644 --- a/mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp +++ b/mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp @@ -172,57 +172,57 @@ Type Importer::getStdTypeForAttr(LLVMType type) { if (!type) return nullptr; - if (type.isIntegerTy()) - return b.getIntegerType(type.getIntegerBitWidth()); + if (auto intType = type.dyn_cast()) + return b.getIntegerType(intType.getBitWidth()); - if (type.isFloatTy()) + if (type.isa()) return b.getF32Type(); - if (type.isDoubleTy()) + if (type.isa()) return b.getF64Type(); // LLVM vectors can only contain scalars. - if (type.isVectorTy()) { - auto numElements = type.getVectorElementCount(); + if (auto vectorType = type.dyn_cast()) { + auto numElements = vectorType.getElementCount(); if (numElements.isScalable()) { emitError(unknownLoc) << "scalable vectors not supported"; return nullptr; } - Type elementType = getStdTypeForAttr(type.getVectorElementType()); + Type elementType = getStdTypeForAttr(vectorType.getElementType()); if (!elementType) return nullptr; return VectorType::get(numElements.getKnownMinValue(), elementType); } // LLVM arrays can contain other arrays or vectors. - if (type.isArrayTy()) { + if (auto arrayType = type.dyn_cast()) { // Recover the nested array shape. SmallVector shape; - shape.push_back(type.getArrayNumElements()); - while (type.getArrayElementType().isArrayTy()) { - type = type.getArrayElementType(); - shape.push_back(type.getArrayNumElements()); + shape.push_back(arrayType.getNumElements()); + while (arrayType.getElementType().isa()) { + arrayType = arrayType.getElementType().cast(); + shape.push_back(arrayType.getNumElements()); } // If the innermost type is a vector, use the multi-dimensional vector as // attribute type. - if (type.getArrayElementType().isVectorTy()) { - LLVMType vectorType = type.getArrayElementType(); - auto numElements = vectorType.getVectorElementCount(); + if (auto vectorType = + arrayType.getElementType().dyn_cast()) { + auto numElements = vectorType.getElementCount(); if (numElements.isScalable()) { emitError(unknownLoc) << "scalable vectors not supported"; return nullptr; } shape.push_back(numElements.getKnownMinValue()); - Type elementType = getStdTypeForAttr(vectorType.getVectorElementType()); + Type elementType = getStdTypeForAttr(vectorType.getElementType()); if (!elementType) return nullptr; return VectorType::get(shape, elementType); } // Otherwise use a tensor. - Type elementType = getStdTypeForAttr(type.getArrayElementType()); + Type elementType = getStdTypeForAttr(arrayType.getElementType()); if (!elementType) return nullptr; return RankedTensorType::get(shape, elementType); @@ -261,7 +261,7 @@ Attribute Importer::getConstantAsAttr(llvm::Constant *value) { if (!attrType) return nullptr; - if (type.isIntegerTy()) { + if (type.isa()) { SmallVector values; values.reserve(cd->getNumElements()); for (unsigned i = 0, e = cd->getNumElements(); i < e; ++i) @@ -269,7 +269,7 @@ Attribute Importer::getConstantAsAttr(llvm::Constant *value) { return DenseElementsAttr::get(attrType, values); } - if (type.isFloatTy() || type.isDoubleTy()) { + if (type.isa() || type.isa()) { SmallVector values; values.reserve(cd->getNumElements()); for (unsigned i = 0, e = cd->getNumElements(); i < e; ++i) @@ -777,7 +777,8 @@ LogicalResult Importer::processFunction(llvm::Function *f) { instMap.clear(); unknownInstMap.clear(); - LLVMType functionType = processType(f->getFunctionType()); + auto functionType = + processType(f->getFunctionType()).dyn_cast(); if (!functionType) return failure(); @@ -805,8 +806,8 @@ LogicalResult Importer::processFunction(llvm::Function *f) { // Add function arguments to the entry block. for (auto kv : llvm::enumerate(f->args())) - instMap[&kv.value()] = blockList[0]->addArgument( - functionType.getFunctionParamType(kv.index())); + instMap[&kv.value()] = + blockList[0]->addArgument(functionType.getParamType(kv.index())); for (auto bbs : llvm::zip(*f, blockList)) { if (failed(processBasicBlock(&std::get<0>(bbs), std::get<1>(bbs)))) diff --git a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp index 8c650506e2d74..ae0745b0be28f 100644 --- a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp @@ -969,7 +969,7 @@ LogicalResult ModuleTranslation::convertOneFunction(LLVMFuncOp func) { // NB: Attribute already verified to be boolean, so check if we can indeed // attach the attribute to this argument, based on its type. auto argTy = mlirArg.getType().dyn_cast(); - if (!argTy.isPointerTy()) + if (!argTy.isa()) return func.emitError( "llvm.noalias attribute attached to LLVM non-pointer argument"); if (attr.getValue()) @@ -981,7 +981,7 @@ LogicalResult ModuleTranslation::convertOneFunction(LLVMFuncOp func) { // NB: Attribute already verified to be int, so check if we can indeed // attach the attribute to this argument, based on its type. auto argTy = mlirArg.getType().dyn_cast(); - if (!argTy.isPointerTy()) + if (!argTy.isa()) return func.emitError( "llvm.align attribute attached to LLVM non-pointer argument"); llvmArg.addAttrs( diff --git a/mlir/test/Dialect/LLVMIR/invalid.mlir b/mlir/test/Dialect/LLVMIR/invalid.mlir index 9461ebbd9ede9..d02c252c0bf36 100644 --- a/mlir/test/Dialect/LLVMIR/invalid.mlir +++ b/mlir/test/Dialect/LLVMIR/invalid.mlir @@ -98,7 +98,7 @@ func @gep_non_function_type(%pos : !llvm.i64, %base : !llvm.ptr) { // ----- func @load_non_llvm_type(%foo : memref) { - // expected-error@+1 {{expected LLVM IR dialect type}} + // expected-error@+1 {{expected LLVM pointer type}} llvm.load %foo : memref } @@ -112,7 +112,7 @@ func @load_non_ptr_type(%foo : !llvm.float) { // ----- func @store_non_llvm_type(%foo : memref, %bar : !llvm.float) { - // expected-error@+1 {{expected LLVM IR dialect type}} + // expected-error@+1 {{expected LLVM pointer type}} llvm.store %bar, %foo : memref } @@ -267,7 +267,7 @@ func @insertvalue_array_out_of_bounds() { // ----- func @insertvalue_wrong_nesting() { - // expected-error@+1 {{expected wrapped LLVM IR structure/array type}} + // expected-error@+1 {{expected LLVM IR structure/array type}} llvm.insertvalue %a, %b[0,0] : !llvm.struct<(i32)> } @@ -311,7 +311,7 @@ func @extractvalue_array_out_of_bounds() { // ----- func @extractvalue_wrong_nesting() { - // expected-error@+1 {{expected wrapped LLVM IR structure/array type}} + // expected-error@+1 {{expected LLVM IR structure/array type}} llvm.extractvalue %b[0,0] : !llvm.struct<(i32)> } From 1c19804ebf4c97666a5c7de86ca7432c6b020205 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Tue, 22 Dec 2020 15:14:30 -0500 Subject: [PATCH 135/378] [OpenMP] Add OpenMP Documentation for Libomptarget environment variables Add support to the OpenMP web pages for environment variables supported by Libomptarget and their usage. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D93723 --- openmp/docs/design/Runtimes.rst | 82 +++++++++++++++++++++++++++++++++ 1 file changed, 82 insertions(+) diff --git a/openmp/docs/design/Runtimes.rst b/openmp/docs/design/Runtimes.rst index 61491060ea047..39ed256c48569 100644 --- a/openmp/docs/design/Runtimes.rst +++ b/openmp/docs/design/Runtimes.rst @@ -16,6 +16,88 @@ the LLVM/OpenMP host runtime, aka. `libomp.so`, is available as a `pdf LLVM/OpenMP Target Host Runtime (``libomptarget``) -------------------------------------------------- +Environment Variables +^^^^^^^^^^^^^^^^^^^^^ + +``libomptarget`` uses environment variables to control different features of the +library at runtime. This allows the user to obtain useful runtime information as +well as enable or disable certain features. A full list of supported environment +variables is defined below. + + * ``LIBOMPTARGET_DEBUG=`` + * ``LIBOMPTARGET_PROFILE=`` + * ``LIBOMPTARGET_MEMORY_MANAGER_THRESHOLD=`` + * ``LIBOMPTARGET_INFO=`` + +LIBOMPTARGET_DEBUG +"""""""""""""""""" + +``LIBOMPTARGET_DEBUG`` controls whether or not debugging information will be +displayed. This feature is only availible if ``libomptarget`` was built with +``-DOMPTARGET_DEBUG``. The debugging output provided is intended for use by +``libomptarget`` developers. More user-friendly output is presented when using +``LIBOMPTARGET_INFO``. + +LIBOMPTARGET_PROFILE +"""""""""""""""""""" +``LIBOMPTARGET_PROFILE`` allows ``libomptarget`` to generate time profile output +similar to Clang's ``-ftime-trace`` option. This generates a JSON file based on +`Chrome Tracing`_ that can be viewed with ``chrome://tracing`` or the +`Speedscope App`_. Building this feature depends on the `LLVM Support Library`_ +for time trace output. Using this library is enabled by default when building +using the CMake option ``OPENMP_ENABLE_LIBOMPTARGET_PROFILING``. The output will +be saved to the filename specified by the environment variable. + +.. _`Chrome Tracing`: https://www.chromium.org/developers/how-tos/trace-event-profiling-tool + +.. _`Speedscope App`: https://www.speedscope.app/ + +.. _`LLVM Support Library`: https://llvm.org/docs/SupportLibrary.html + +LIBOMPTARGET_MEMORY_MANAGER_THRESHOLD +""""""""""""""""""""""""""""""""""""" + +``LIBOMPTARGET_MEMORY_MANAGER_THRESHOLD`` sets the threshold size for which the +``libomptarget`` memory manager will handle the allocation. Any allocations +larger than this threshold will not use the memory manager and be freed after +the device kernel exits The default threshold value is ``8Kb``. If +``LIBOMPTARGET_MEMORY_MANAGER_THRESHOLD`` is set to ``0`` the memory manager +will be completely disabled. + +LIBOMPTARGET_INFO +""""""""""""""""" + +``LIBOMPTARGET_INFO`` allows the user to request different types runtime +information from ``libomptarget``. ``LIBOMPTARGET_INFO`` uses a 32-bit field to +enable or disable different types of information. This includes information +about data-mappings and kernel execution. It is recommended to build your +application with debugging information enabled, this will enable filenames and +variable declarations in the information messages. OpenMP Debugging information +is enabled at any level of debugging so a full debug runtime is not required. +For minimal debugging information compile with `-gline-tables-only`, or compile +with `-g` for full debug information. A full list of flags supported by +``LIBOMPTARGET_INFO`` is given below. + + * Print all data arguments upon entering an OpenMP device kernel: ``0x01`` + * Indicate when a mapped address already exists in the device mapping table: + ``0x02`` + * Dump the contents of the device pointer map at kernel exit: ``0x04`` + * Print OpenMP kernel information from device plugins: ``0x10`` + +Any combination of these flags can be used by setting the appropriate bits. For +example, to enable printing all data active in an OpenMP target region along +with ``CUDA`` information, run the following ``bash`` command. + +.. code-block:: console + + $ env LIBOMPTARGET_INFO=$((1 << 0x1 | 1 << 0x10)) ./your-application + +Or, to enable every flag run with every bit set. + +.. code-block:: console + + $ env LIBOMPTARGET_INFO=-1 ./your-application + LLVM/OpenMP Target Host Runtime Plugins (``libomptarget.rtl.XXXX``) ------------------------------------------------------------------- From 75a3f326c3d874853031d8bedd1d00127c835103 Mon Sep 17 00:00:00 2001 From: Chris Lattner Date: Tue, 22 Dec 2020 10:35:15 -0800 Subject: [PATCH 136/378] [IR] Add an ImplicitLocOpBuilder helper class for building IR with the same loc. One common situation is to create a lot of IR at a well known location, e.g. when doing a big rewrite from one dialect to another where you're expanding ops out into lots of other ops. For these sorts of situations, it is annoying to pass the location into every create call. As we discused in a few threads on the forum, a way to help with this is to produce a new sort of builder that holds a location and provides it to each of the create<> calls automatically. This patch implements an ImplicitLocOpBuilder class that does this. We've had good experience with this in the CIRCT project, and it makes sense to upstream to MLIR. I picked a random pass to adopt it to show the impact, but I don't think there is any particular need to force adopt it in the codebase. Differential Revision: https://reviews.llvm.org/D93717 --- mlir/include/mlir/IR/ImplicitLocOpBuilder.h | 123 ++++++++++++++++++ .../Conversion/AsyncToLLVM/AsyncToLLVM.cpp | 96 +++++++------- 2 files changed, 170 insertions(+), 49 deletions(-) create mode 100644 mlir/include/mlir/IR/ImplicitLocOpBuilder.h diff --git a/mlir/include/mlir/IR/ImplicitLocOpBuilder.h b/mlir/include/mlir/IR/ImplicitLocOpBuilder.h new file mode 100644 index 0000000000000..2dc7c34f4e855 --- /dev/null +++ b/mlir/include/mlir/IR/ImplicitLocOpBuilder.h @@ -0,0 +1,123 @@ +//===- ImplicitLocOpBuilder.h - Convenience OpBuilder -----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Helper class to create ops with a modally set location. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_IR_IMPLICITLOCOPBUILDER_H +#define MLIR_IR_IMPLICITLOCOPBUILDER_H + +#include "mlir/IR/Builders.h" + +namespace mlir { + +/// ImplictLocOpBuilder maintains a 'current location', allowing use of the +/// create<> method without specifying the location. It is otherwise the same +/// as OpBuilder. +class ImplicitLocOpBuilder : public mlir::OpBuilder { +public: + /// Create an ImplicitLocOpBuilder using the insertion point and listener from + /// an existing OpBuilder. + ImplicitLocOpBuilder(Location loc, const OpBuilder &builder) + : OpBuilder(builder), curLoc(loc) {} + + /// OpBuilder has a bunch of convenience constructors - we support them all + /// with the additional Location. + template + ImplicitLocOpBuilder(Location loc, T &&operand, Listener *listener = nullptr) + : OpBuilder(std::forward(operand), listener), curLoc(loc) {} + + ImplicitLocOpBuilder(Location loc, Block *block, Block::iterator insertPoint, + Listener *listener = nullptr) + : OpBuilder(block, insertPoint, listener), curLoc(loc) {} + + /// Create a builder and set the insertion point to before the first operation + /// in the block but still inside the block. + static ImplicitLocOpBuilder atBlockBegin(Location loc, Block *block, + Listener *listener = nullptr) { + return ImplicitLocOpBuilder(loc, block, block->begin(), listener); + } + + /// Create a builder and set the insertion point to after the last operation + /// in the block but still inside the block. + static ImplicitLocOpBuilder atBlockEnd(Location loc, Block *block, + Listener *listener = nullptr) { + return ImplicitLocOpBuilder(loc, block, block->end(), listener); + } + + /// Create a builder and set the insertion point to before the block + /// terminator. + static ImplicitLocOpBuilder atBlockTerminator(Location loc, Block *block, + Listener *listener = nullptr) { + auto *terminator = block->getTerminator(); + assert(terminator != nullptr && "the block has no terminator"); + return ImplicitLocOpBuilder(loc, block, Block::iterator(terminator), + listener); + } + + /// Accessors for the implied location. + Location getLoc() const { return curLoc; } + void setLoc(Location loc) { curLoc = loc; } + + // We allow clients to use the explicit-loc version of create as well. + using OpBuilder::create; + using OpBuilder::createOrFold; + + /// Create an operation of specific op type at the current insertion point and + /// location. + template + OpTy create(Args &&... args) { + return OpBuilder::create(curLoc, std::forward(args)...); + } + + /// Create an operation of specific op type at the current insertion point, + /// and immediately try to fold it. This functions populates 'results' with + /// the results after folding the operation. + template + void createOrFold(llvm::SmallVectorImpl &results, Args &&... args) { + OpBuilder::createOrFold(results, curLoc, std::forward(args)...); + } + + /// Overload to create or fold a single result operation. + template + typename std::enable_if(), + Value>::type + createOrFold(Args &&... args) { + return OpBuilder::createOrFold(curLoc, std::forward(args)...); + } + + /// Overload to create or fold a zero result operation. + template + typename std::enable_if(), + OpTy>::type + createOrFold(Args &&... args) { + return OpBuilder::createOrFold(curLoc, std::forward(args)...); + } + + /// This builder can also be used to emit diagnostics to the current location. + mlir::InFlightDiagnostic + emitError(const llvm::Twine &message = llvm::Twine()) { + return mlir::emitError(curLoc, message); + } + mlir::InFlightDiagnostic + emitWarning(const llvm::Twine &message = llvm::Twine()) { + return mlir::emitWarning(curLoc, message); + } + mlir::InFlightDiagnostic + emitRemark(const llvm::Twine &message = llvm::Twine()) { + return mlir::emitRemark(curLoc, message); + } + +private: + Location curLoc; +}; + +} // namespace mlir + +#endif // MLIR_IR_IMPLICITLOCOPBUILDER_H \ No newline at end of file diff --git a/mlir/lib/Conversion/AsyncToLLVM/AsyncToLLVM.cpp b/mlir/lib/Conversion/AsyncToLLVM/AsyncToLLVM.cpp index 65545d8ab2de1..2415924557db7 100644 --- a/mlir/lib/Conversion/AsyncToLLVM/AsyncToLLVM.cpp +++ b/mlir/lib/Conversion/AsyncToLLVM/AsyncToLLVM.cpp @@ -13,7 +13,7 @@ #include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/Dialect/StandardOps/IR/Ops.h" #include "mlir/IR/BlockAndValueMapping.h" -#include "mlir/IR/Builders.h" +#include "mlir/IR/ImplicitLocOpBuilder.h" #include "mlir/IR/TypeUtilities.h" #include "mlir/Pass/Pass.h" #include "mlir/Transforms/DialectConversion.h" @@ -112,12 +112,13 @@ struct AsyncAPI { // Adds Async Runtime C API declarations to the module. static void addAsyncRuntimeApiDeclarations(ModuleOp module) { - auto builder = OpBuilder::atBlockTerminator(module.getBody()); + auto builder = ImplicitLocOpBuilder::atBlockTerminator(module.getLoc(), + module.getBody()); auto addFuncDecl = [&](StringRef name, FunctionType type) { if (module.lookupSymbol(name)) return; - builder.create(module.getLoc(), name, type).setPrivate(); + builder.create(name, type).setPrivate(); }; MLIRContext *ctx = module.getContext(); @@ -149,13 +150,13 @@ static constexpr const char *kCoroFree = "llvm.coro.free"; static constexpr const char *kCoroResume = "llvm.coro.resume"; /// Adds an LLVM function declaration to a module. -static void addLLVMFuncDecl(ModuleOp module, OpBuilder &builder, StringRef name, - LLVM::LLVMType ret, +static void addLLVMFuncDecl(ModuleOp module, ImplicitLocOpBuilder &builder, + StringRef name, LLVM::LLVMType ret, ArrayRef params) { if (module.lookupSymbol(name)) return; LLVM::LLVMType type = LLVM::LLVMType::getFunctionTy(ret, params, false); - builder.create(module.getLoc(), name, type); + builder.create(name, type); } /// Adds coroutine intrinsics declarations to the module. @@ -163,7 +164,8 @@ static void addCoroutineIntrinsicsDeclarations(ModuleOp module) { using namespace mlir::LLVM; MLIRContext *ctx = module.getContext(); - OpBuilder builder(module.getBody()->getTerminator()); + ImplicitLocOpBuilder builder(module.getLoc(), + module.getBody()->getTerminator()); auto token = LLVMTokenType::get(ctx); auto voidTy = LLVMType::getVoidTy(ctx); @@ -196,7 +198,8 @@ static void addCRuntimeDeclarations(ModuleOp module) { using namespace mlir::LLVM; MLIRContext *ctx = module.getContext(); - OpBuilder builder(module.getBody()->getTerminator()); + ImplicitLocOpBuilder builder(module.getLoc(), + module.getBody()->getTerminator()); auto voidTy = LLVMType::getVoidTy(ctx); auto i64 = LLVMType::getInt64Ty(ctx); @@ -232,13 +235,13 @@ static void addResumeFunction(ModuleOp module) { resumeOp.setPrivate(); auto *block = resumeOp.addEntryBlock(); - OpBuilder blockBuilder = OpBuilder::atBlockEnd(block); + auto blockBuilder = ImplicitLocOpBuilder::atBlockEnd(loc, block); - blockBuilder.create(loc, TypeRange(), + blockBuilder.create(TypeRange(), blockBuilder.getSymbolRefAttr(kCoroResume), resumeOp.getArgument(0)); - blockBuilder.create(loc, ValueRange()); + blockBuilder.create(ValueRange()); } //===----------------------------------------------------------------------===// @@ -302,13 +305,12 @@ static CoroMachinery setupCoroMachinery(FuncOp func) { Block *entryBlock = func.addEntryBlock(); Location loc = func.getBody().getLoc(); - OpBuilder builder = OpBuilder::atBlockBegin(entryBlock); + auto builder = ImplicitLocOpBuilder::atBlockBegin(loc, entryBlock); // ------------------------------------------------------------------------ // // Allocate async tokens/values that we will return from a ramp function. // ------------------------------------------------------------------------ // - auto createToken = - builder.create(loc, kCreateToken, TokenType::get(ctx)); + auto createToken = builder.create(kCreateToken, TokenType::get(ctx)); // ------------------------------------------------------------------------ // // Initialize coroutine: allocate frame, get coroutine handle. @@ -316,28 +318,28 @@ static CoroMachinery setupCoroMachinery(FuncOp func) { // Constants for initializing coroutine frame. auto constZero = - builder.create(loc, i32, builder.getI32IntegerAttr(0)); + builder.create(i32, builder.getI32IntegerAttr(0)); auto constFalse = - builder.create(loc, i1, builder.getBoolAttr(false)); - auto nullPtr = builder.create(loc, i8Ptr); + builder.create(i1, builder.getBoolAttr(false)); + auto nullPtr = builder.create(i8Ptr); // Get coroutine id: @llvm.coro.id auto coroId = builder.create( - loc, token, builder.getSymbolRefAttr(kCoroId), + token, builder.getSymbolRefAttr(kCoroId), ValueRange({constZero, nullPtr, nullPtr, nullPtr})); // Get coroutine frame size: @llvm.coro.size.i64 auto coroSize = builder.create( - loc, i64, builder.getSymbolRefAttr(kCoroSizeI64), ValueRange()); + i64, builder.getSymbolRefAttr(kCoroSizeI64), ValueRange()); // Allocate memory for coroutine frame. - auto coroAlloc = builder.create( - loc, i8Ptr, builder.getSymbolRefAttr(kMalloc), - ValueRange(coroSize.getResult(0))); + auto coroAlloc = + builder.create(i8Ptr, builder.getSymbolRefAttr(kMalloc), + ValueRange(coroSize.getResult(0))); // Begin a coroutine: @llvm.coro.begin auto coroHdl = builder.create( - loc, i8Ptr, builder.getSymbolRefAttr(kCoroBegin), + i8Ptr, builder.getSymbolRefAttr(kCoroBegin), ValueRange({coroId.getResult(0), coroAlloc.getResult(0)})); Block *cleanupBlock = func.addBlock(); @@ -350,15 +352,14 @@ static CoroMachinery setupCoroMachinery(FuncOp func) { // Get a pointer to the coroutine frame memory: @llvm.coro.free. auto coroMem = builder.create( - loc, i8Ptr, builder.getSymbolRefAttr(kCoroFree), + i8Ptr, builder.getSymbolRefAttr(kCoroFree), ValueRange({coroId.getResult(0), coroHdl.getResult(0)})); // Free the memory. - builder.create(loc, TypeRange(), - builder.getSymbolRefAttr(kFree), + builder.create(TypeRange(), builder.getSymbolRefAttr(kFree), ValueRange(coroMem.getResult(0))); // Branch into the suspend block. - builder.create(loc, suspendBlock); + builder.create(suspendBlock); // ------------------------------------------------------------------------ // // Coroutine suspend block: mark the end of a coroutine and return allocated @@ -367,17 +368,17 @@ static CoroMachinery setupCoroMachinery(FuncOp func) { builder.setInsertionPointToStart(suspendBlock); // Mark the end of a coroutine: @llvm.coro.end. - builder.create(loc, i1, builder.getSymbolRefAttr(kCoroEnd), + builder.create(i1, builder.getSymbolRefAttr(kCoroEnd), ValueRange({coroHdl.getResult(0), constFalse})); // Return created `async.token` from the suspend block. This will be the // return value of a coroutine ramp function. - builder.create(loc, createToken.getResult(0)); + builder.create(createToken.getResult(0)); // Branch from the entry block to the cleanup block to create a valid CFG. builder.setInsertionPointToEnd(entryBlock); - builder.create(loc, cleanupBlock); + builder.create(cleanupBlock); // `async.await` op lowering will create resume blocks for async // continuations, and will conditionally branch to cleanup or suspend blocks. @@ -471,8 +472,6 @@ outlineExecuteOp(SymbolTable &symbolTable, ExecuteOp execute) { MLIRContext *ctx = module.getContext(); Location loc = execute.getLoc(); - OpBuilder moduleBuilder(module.getBody()->getTerminator()); - // Collect all outlined function inputs. llvm::SetVector functionInputs(execute.dependencies().begin(), execute.dependencies().end()); @@ -484,13 +483,13 @@ outlineExecuteOp(SymbolTable &symbolTable, ExecuteOp execute) { SmallVector inputTypes(typesRange.begin(), typesRange.end()); auto outputTypes = execute.getResultTypes(); - auto funcType = moduleBuilder.getFunctionType(inputTypes, outputTypes); + auto funcType = FunctionType::get(ctx, inputTypes, outputTypes); auto funcAttrs = ArrayRef(); // TODO: Derive outlined function name from the parent FuncOp (support // multiple nested async.execute operations). FuncOp func = FuncOp::create(loc, kAsyncFnPrefix, funcType, funcAttrs); - symbolTable.insert(func, moduleBuilder.getInsertionPoint()); + symbolTable.insert(func, Block::iterator(module.getBody()->getTerminator())); SymbolTable::setSymbolVisibility(func, SymbolTable::Visibility::Private); @@ -502,21 +501,21 @@ outlineExecuteOp(SymbolTable &symbolTable, ExecuteOp execute) { // Async execute API (execution will be resumed in a thread managed by the // async runtime). Block *entryBlock = &func.getBlocks().front(); - OpBuilder builder = OpBuilder::atBlockTerminator(entryBlock); + auto builder = ImplicitLocOpBuilder::atBlockTerminator(loc, entryBlock); // A pointer to coroutine resume intrinsic wrapper. auto resumeFnTy = AsyncAPI::resumeFunctionType(ctx); auto resumePtr = builder.create( - loc, LLVM::LLVMPointerType::get(resumeFnTy), kResume); + LLVM::LLVMPointerType::get(resumeFnTy), kResume); // Save the coroutine state: @llvm.coro.save auto coroSave = builder.create( - loc, LLVM::LLVMTokenType::get(ctx), builder.getSymbolRefAttr(kCoroSave), + LLVM::LLVMTokenType::get(ctx), builder.getSymbolRefAttr(kCoroSave), ValueRange({coro.coroHandle})); // Call async runtime API to execute a coroutine in the managed thread. SmallVector executeArgs = {coro.coroHandle, resumePtr.res()}; - builder.create(loc, TypeRange(), kExecute, executeArgs); + builder.create(TypeRange(), kExecute, executeArgs); // Split the entry block before the terminator. auto *terminatorOp = entryBlock->getTerminator(); @@ -528,7 +527,7 @@ outlineExecuteOp(SymbolTable &symbolTable, ExecuteOp execute) { // Await on all dependencies before starting to execute the body region. builder.setInsertionPointToStart(resume); for (size_t i = 0; i < execute.dependencies().size(); ++i) - builder.create(loc, func.getArgument(i)); + builder.create(func.getArgument(i)); // Map from function inputs defined above the execute op to the function // arguments. @@ -540,17 +539,16 @@ outlineExecuteOp(SymbolTable &symbolTable, ExecuteOp execute) { // to async runtime to emplace the result token. for (Operation &op : execute.body().getOps()) { if (isa(op)) { - builder.create(loc, kEmplaceToken, TypeRange(), coro.asyncToken); + builder.create(kEmplaceToken, TypeRange(), coro.asyncToken); continue; } builder.clone(op, valueMapping); } // Replace the original `async.execute` with a call to outlined function. - OpBuilder callBuilder(execute); - auto callOutlinedFunc = - callBuilder.create(loc, func.getName(), execute.getResultTypes(), - functionInputs.getArrayRef()); + ImplicitLocOpBuilder callBuilder(loc, execute); + auto callOutlinedFunc = callBuilder.create( + func.getName(), execute.getResultTypes(), functionInputs.getArrayRef()); execute.replaceAllUsesWith(callOutlinedFunc.getResults()); execute.erase(); @@ -744,24 +742,24 @@ class AwaitOpLoweringBase : public ConversionPattern { if (isInCoroutine) { const CoroMachinery &coro = outlined->getSecond(); - OpBuilder builder(op, rewriter.getListener()); + ImplicitLocOpBuilder builder(loc, op, rewriter.getListener()); MLIRContext *ctx = op->getContext(); // A pointer to coroutine resume intrinsic wrapper. auto resumeFnTy = AsyncAPI::resumeFunctionType(ctx); auto resumePtr = builder.create( - loc, LLVM::LLVMPointerType::get(resumeFnTy), kResume); + LLVM::LLVMPointerType::get(resumeFnTy), kResume); // Save the coroutine state: @llvm.coro.save auto coroSave = builder.create( - loc, LLVM::LLVMTokenType::get(ctx), - builder.getSymbolRefAttr(kCoroSave), ValueRange(coro.coroHandle)); + LLVM::LLVMTokenType::get(ctx), builder.getSymbolRefAttr(kCoroSave), + ValueRange(coro.coroHandle)); // Call async runtime API to resume a coroutine in the managed thread when // the async await argument becomes ready. SmallVector awaitAndExecuteArgs = {operands[0], coro.coroHandle, resumePtr.res()}; - builder.create(loc, TypeRange(), coroAwaitFuncName, + builder.create(TypeRange(), coroAwaitFuncName, awaitAndExecuteArgs); Block *suspended = op->getBlock(); From 6dfe5801e01d259d00198147b27438ffea39e59f Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Tue, 15 Dec 2020 14:26:10 -0800 Subject: [PATCH 137/378] scudo: Move the configuration for the primary allocator to Config. NFCI. This will allow the primary and secondary allocators to share the MaySupportMemoryTagging bool. Differential Revision: https://reviews.llvm.org/D93728 --- .../lib/scudo/standalone/allocator_config.h | 51 ++++++++++----- compiler-rt/lib/scudo/standalone/memtag.h | 5 ++ compiler-rt/lib/scudo/standalone/primary32.h | 28 ++++----- compiler-rt/lib/scudo/standalone/primary64.h | 25 +++----- .../scudo/standalone/tests/combined_test.cpp | 17 +++-- .../scudo/standalone/tests/primary_test.cpp | 62 ++++++++++++++----- 6 files changed, 117 insertions(+), 71 deletions(-) diff --git a/compiler-rt/lib/scudo/standalone/allocator_config.h b/compiler-rt/lib/scudo/standalone/allocator_config.h index 8f1757dab3227..12daaa2f6b440 100644 --- a/compiler-rt/lib/scudo/standalone/allocator_config.h +++ b/compiler-rt/lib/scudo/standalone/allocator_config.h @@ -25,13 +25,18 @@ namespace scudo { struct DefaultConfig { using SizeClassMap = DefaultSizeClassMap; + static const bool MaySupportMemoryTagging = false; + #if SCUDO_CAN_USE_PRIMARY64 - // 1GB Regions - typedef SizeClassAllocator64 Primary; + typedef SizeClassAllocator64 Primary; + static const uptr PrimaryRegionSizeLog = 30U; #else - // 512KB regions - typedef SizeClassAllocator32 Primary; + typedef SizeClassAllocator32 Primary; + static const uptr PrimaryRegionSizeLog = 19U; #endif + static const s32 PrimaryMinReleaseToOsIntervalMs = INT32_MIN; + static const s32 PrimaryMaxReleaseToOsIntervalMs = INT32_MAX; + typedef MapAllocatorCache SecondaryCache; static const u32 SecondaryCacheEntriesArraySize = 32U; static const u32 SecondaryCacheDefaultMaxEntriesCount = 32U; @@ -44,15 +49,18 @@ struct DefaultConfig { struct AndroidConfig { using SizeClassMap = AndroidSizeClassMap; + static const bool MaySupportMemoryTagging = true; + #if SCUDO_CAN_USE_PRIMARY64 - // 256MB regions - typedef SizeClassAllocator64 - Primary; + typedef SizeClassAllocator64 Primary; + static const uptr PrimaryRegionSizeLog = 28U; #else - // 256KB regions - typedef SizeClassAllocator32 Primary; + typedef SizeClassAllocator32 Primary; + static const uptr PrimaryRegionSizeLog = 18U; #endif + static const s32 PrimaryMinReleaseToOsIntervalMs = 1000; + static const s32 PrimaryMaxReleaseToOsIntervalMs = 1000; + typedef MapAllocatorCache SecondaryCache; static const u32 SecondaryCacheEntriesArraySize = 256U; static const u32 SecondaryCacheDefaultMaxEntriesCount = 32U; @@ -66,13 +74,18 @@ struct AndroidConfig { struct AndroidSvelteConfig { using SizeClassMap = SvelteSizeClassMap; + static const bool MaySupportMemoryTagging = false; + #if SCUDO_CAN_USE_PRIMARY64 - // 128MB regions - typedef SizeClassAllocator64 Primary; + typedef SizeClassAllocator64 Primary; + static const uptr PrimaryRegionSizeLog = 27U; #else - // 64KB regions - typedef SizeClassAllocator32 Primary; + typedef SizeClassAllocator32 Primary; + static const uptr PrimaryRegionSizeLog = 16U; #endif + static const s32 PrimaryMinReleaseToOsIntervalMs = 1000; + static const s32 PrimaryMaxReleaseToOsIntervalMs = 1000; + typedef MapAllocatorCache SecondaryCache; static const u32 SecondaryCacheEntriesArraySize = 16U; static const u32 SecondaryCacheDefaultMaxEntriesCount = 4U; @@ -86,8 +99,14 @@ struct AndroidSvelteConfig { #if SCUDO_CAN_USE_PRIMARY64 struct FuchsiaConfig { - // 1GB Regions - typedef SizeClassAllocator64 Primary; + using SizeClassMap = DefaultSizeClassMap; + static const bool MaySupportMemoryTagging = false; + + typedef SizeClassAllocator64 Primary; + static const uptr PrimaryRegionSizeLog = 30U; + static const s32 PrimaryMinReleaseToOsIntervalMs = INT32_MIN; + static const s32 PrimaryMaxReleaseToOsIntervalMs = INT32_MAX; + typedef MapAllocatorNoCache SecondaryCache; template using TSDRegistryT = TSDRegistrySharedT; // Shared, max 8 TSDs. diff --git a/compiler-rt/lib/scudo/standalone/memtag.h b/compiler-rt/lib/scudo/standalone/memtag.h index c3c4f574b4fc9..d848e3a0b6c9b 100644 --- a/compiler-rt/lib/scudo/standalone/memtag.h +++ b/compiler-rt/lib/scudo/standalone/memtag.h @@ -268,6 +268,11 @@ inline void setRandomTag(void *Ptr, uptr Size, uptr ExcludeMask, *TaggedEnd = storeTags(*TaggedBegin, *TaggedBegin + Size); } +template +inline constexpr bool allocatorSupportsMemoryTagging() { + return archSupportsMemoryTagging() && Config::MaySupportMemoryTagging; +} + } // namespace scudo #endif diff --git a/compiler-rt/lib/scudo/standalone/primary32.h b/compiler-rt/lib/scudo/standalone/primary32.h index 0db95d2e1f113..c744670b43926 100644 --- a/compiler-rt/lib/scudo/standalone/primary32.h +++ b/compiler-rt/lib/scudo/standalone/primary32.h @@ -39,20 +39,15 @@ namespace scudo { // Memory used by this allocator is never unmapped but can be partially // reclaimed if the platform allows for it. -template -class SizeClassAllocator32 { +template class SizeClassAllocator32 { public: - typedef SizeClassMapT SizeClassMap; + typedef typename Config::SizeClassMap SizeClassMap; // The bytemap can only track UINT8_MAX - 1 classes. static_assert(SizeClassMap::LargestClassId <= (UINT8_MAX - 1), ""); // Regions should be large enough to hold the largest Block. - static_assert((1UL << RegionSizeLog) >= SizeClassMap::MaxSize, ""); - typedef SizeClassAllocator32 - ThisT; + static_assert((1UL << Config::PrimaryRegionSizeLog) >= SizeClassMap::MaxSize, + ""); + typedef SizeClassAllocator32 ThisT; typedef SizeClassAllocatorLocalCache CacheT; typedef typename CacheT::TransferBatch TransferBatch; static const bool SupportsMemoryTagging = false; @@ -199,9 +194,9 @@ class SizeClassAllocator32 { bool setOption(Option O, sptr Value) { if (O == Option::ReleaseInterval) { - const s32 Interval = - Max(Min(static_cast(Value), MaxReleaseToOsIntervalMs), - MinReleaseToOsIntervalMs); + const s32 Interval = Max( + Min(static_cast(Value), Config::PrimaryMaxReleaseToOsIntervalMs), + Config::PrimaryMinReleaseToOsIntervalMs); atomic_store_relaxed(&ReleaseToOsIntervalMs, Interval); return true; } @@ -236,8 +231,9 @@ class SizeClassAllocator32 { private: static const uptr NumClasses = SizeClassMap::NumClasses; - static const uptr RegionSize = 1UL << RegionSizeLog; - static const uptr NumRegions = SCUDO_MMAP_RANGE_SIZE >> RegionSizeLog; + static const uptr RegionSize = 1UL << Config::PrimaryRegionSizeLog; + static const uptr NumRegions = + SCUDO_MMAP_RANGE_SIZE >> Config::PrimaryRegionSizeLog; static const u32 MaxNumBatches = SCUDO_ANDROID ? 4U : 8U; typedef FlatByteMap ByteMap; @@ -270,7 +266,7 @@ class SizeClassAllocator32 { static_assert(sizeof(SizeClassInfo) % SCUDO_CACHE_LINE_SIZE == 0, ""); uptr computeRegionId(uptr Mem) { - const uptr Id = Mem >> RegionSizeLog; + const uptr Id = Mem >> Config::PrimaryRegionSizeLog; CHECK_LT(Id, NumRegions); return Id; } diff --git a/compiler-rt/lib/scudo/standalone/primary64.h b/compiler-rt/lib/scudo/standalone/primary64.h index f9854cbfd4d68..df1310aa8e959 100644 --- a/compiler-rt/lib/scudo/standalone/primary64.h +++ b/compiler-rt/lib/scudo/standalone/primary64.h @@ -40,21 +40,14 @@ namespace scudo { // The memory used by this allocator is never unmapped, but can be partially // released if the platform allows for it. -template -class SizeClassAllocator64 { +template class SizeClassAllocator64 { public: - typedef SizeClassMapT SizeClassMap; - typedef SizeClassAllocator64< - SizeClassMap, RegionSizeLog, MinReleaseToOsIntervalMs, - MaxReleaseToOsIntervalMs, MaySupportMemoryTagging> - ThisT; + typedef typename Config::SizeClassMap SizeClassMap; + typedef SizeClassAllocator64 ThisT; typedef SizeClassAllocatorLocalCache CacheT; typedef typename CacheT::TransferBatch TransferBatch; static const bool SupportsMemoryTagging = - MaySupportMemoryTagging && archSupportsMemoryTagging(); + allocatorSupportsMemoryTagging(); static uptr getSizeByClassId(uptr ClassId) { return (ClassId == SizeClassMap::BatchClassId) @@ -178,9 +171,9 @@ class SizeClassAllocator64 { bool setOption(Option O, sptr Value) { if (O == Option::ReleaseInterval) { - const s32 Interval = - Max(Min(static_cast(Value), MaxReleaseToOsIntervalMs), - MinReleaseToOsIntervalMs); + const s32 Interval = Max( + Min(static_cast(Value), Config::PrimaryMaxReleaseToOsIntervalMs), + Config::PrimaryMinReleaseToOsIntervalMs); atomic_store_relaxed(&ReleaseToOsIntervalMs, Interval); return true; } @@ -258,7 +251,7 @@ class SizeClassAllocator64 { AtomicOptions Options; private: - static const uptr RegionSize = 1UL << RegionSizeLog; + static const uptr RegionSize = 1UL << Config::PrimaryRegionSizeLog; static const uptr NumClasses = SizeClassMap::NumClasses; static const uptr PrimarySize = RegionSize * NumClasses; @@ -308,7 +301,7 @@ class SizeClassAllocator64 { } uptr getRegionBaseByClassId(uptr ClassId) const { - return PrimaryBase + (ClassId << RegionSizeLog); + return PrimaryBase + (ClassId << Config::PrimaryRegionSizeLog); } NOINLINE TransferBatch *populateFreeList(CacheT *C, uptr ClassId, diff --git a/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp b/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp index b0ab0244e877f..53874933e7ac3 100644 --- a/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp @@ -401,10 +401,15 @@ struct DeathSizeClassConfig { static const scudo::uptr DeathRegionSizeLog = 20U; struct DeathConfig { + static const bool MaySupportMemoryTagging = false; + // Tiny allocator, its Primary only serves chunks of four sizes. - using DeathSizeClassMap = scudo::FixedSizeClassMap; - typedef scudo::SizeClassAllocator64 - Primary; + using SizeClassMap = scudo::FixedSizeClassMap; + typedef scudo::SizeClassAllocator64 Primary; + static const scudo::uptr PrimaryRegionSizeLog = DeathRegionSizeLog; + static const scudo::s32 PrimaryMinReleaseToOsIntervalMs = INT32_MIN; + static const scudo::s32 PrimaryMaxReleaseToOsIntervalMs = INT32_MAX; + typedef scudo::MapAllocatorNoCache SecondaryCache; template using TSDRegistryT = scudo::TSDRegistrySharedT; }; @@ -460,13 +465,13 @@ TEST(ScudoCombinedTest, FullRegion) { std::vector V; scudo::uptr FailedAllocationsCount = 0; for (scudo::uptr ClassId = 1U; - ClassId <= DeathConfig::DeathSizeClassMap::LargestClassId; ClassId++) { + ClassId <= DeathConfig::SizeClassMap::LargestClassId; ClassId++) { const scudo::uptr Size = - DeathConfig::DeathSizeClassMap::getSizeByClassId(ClassId); + DeathConfig::SizeClassMap::getSizeByClassId(ClassId); // Allocate enough to fill all of the regions above this one. const scudo::uptr MaxNumberOfChunks = ((1U << DeathRegionSizeLog) / Size) * - (DeathConfig::DeathSizeClassMap::LargestClassId - ClassId + 1); + (DeathConfig::SizeClassMap::LargestClassId - ClassId + 1); void *P; for (scudo::uptr I = 0; I <= MaxNumberOfChunks; I++) { P = Allocator->allocate(Size - 64U, Origin); diff --git a/compiler-rt/lib/scudo/standalone/tests/primary_test.cpp b/compiler-rt/lib/scudo/standalone/tests/primary_test.cpp index 67d1fe52acef9..eed64314cc3d0 100644 --- a/compiler-rt/lib/scudo/standalone/tests/primary_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/primary_test.cpp @@ -52,20 +52,51 @@ template static void testPrimary() { Str.output(); } +template struct TestConfig1 { + using SizeClassMap = SizeClassMapT; + static const scudo::uptr PrimaryRegionSizeLog = 18U; + static const scudo::s32 PrimaryMinReleaseToOsIntervalMs = INT32_MIN; + static const scudo::s32 PrimaryMaxReleaseToOsIntervalMs = INT32_MAX; + static const bool MaySupportMemoryTagging = false; +}; + +template struct TestConfig2 { + using SizeClassMap = SizeClassMapT; + static const scudo::uptr PrimaryRegionSizeLog = 24U; + static const scudo::s32 PrimaryMinReleaseToOsIntervalMs = INT32_MIN; + static const scudo::s32 PrimaryMaxReleaseToOsIntervalMs = INT32_MAX; + static const bool MaySupportMemoryTagging = false; +}; + +template struct TestConfig3 { + using SizeClassMap = SizeClassMapT; + static const scudo::uptr PrimaryRegionSizeLog = 24U; + static const scudo::s32 PrimaryMinReleaseToOsIntervalMs = INT32_MIN; + static const scudo::s32 PrimaryMaxReleaseToOsIntervalMs = INT32_MAX; + static const bool MaySupportMemoryTagging = true; +}; + TEST(ScudoPrimaryTest, BasicPrimary) { using SizeClassMap = scudo::DefaultSizeClassMap; #if !SCUDO_FUCHSIA - testPrimary>(); + testPrimary>>(); #endif - testPrimary>(); - testPrimary>(); + testPrimary>>(); + testPrimary>>(); } +struct SmallRegionsConfig { + using SizeClassMap = scudo::DefaultSizeClassMap; + static const scudo::uptr PrimaryRegionSizeLog = 20U; + static const scudo::s32 PrimaryMinReleaseToOsIntervalMs = INT32_MIN; + static const scudo::s32 PrimaryMaxReleaseToOsIntervalMs = INT32_MAX; + static const bool MaySupportMemoryTagging = false; +}; + // The 64-bit SizeClassAllocator can be easily OOM'd with small region sizes. // For the 32-bit one, it requires actually exhausting memory, so we skip it. TEST(ScudoPrimaryTest, Primary64OOM) { - using Primary = scudo::SizeClassAllocator64; + using Primary = scudo::SizeClassAllocator64; using TransferBatch = Primary::CacheT::TransferBatch; Primary Allocator; Allocator.init(/*ReleaseToOsInterval=*/-1); @@ -142,11 +173,10 @@ template static void testIteratePrimary() { TEST(ScudoPrimaryTest, PrimaryIterate) { using SizeClassMap = scudo::DefaultSizeClassMap; #if !SCUDO_FUCHSIA - testIteratePrimary>(); + testIteratePrimary>>(); #endif - testIteratePrimary>(); - testIteratePrimary>(); + testIteratePrimary>>(); + testIteratePrimary>>(); } static std::mutex Mutex; @@ -204,11 +234,10 @@ template static void testPrimaryThreaded() { TEST(ScudoPrimaryTest, PrimaryThreaded) { using SizeClassMap = scudo::SvelteSizeClassMap; #if !SCUDO_FUCHSIA - testPrimaryThreaded>(); + testPrimaryThreaded>>(); #endif - testPrimaryThreaded>(); - testPrimaryThreaded>(); + testPrimaryThreaded>>(); + testPrimaryThreaded>>(); } // Through a simple allocation that spans two pages, verify that releaseToOS @@ -236,9 +265,8 @@ template static void testReleaseToOS() { TEST(ScudoPrimaryTest, ReleaseToOS) { using SizeClassMap = scudo::DefaultSizeClassMap; #if !SCUDO_FUCHSIA - testReleaseToOS>(); + testReleaseToOS>>(); #endif - testReleaseToOS>(); - testReleaseToOS>(); + testReleaseToOS>>(); + testReleaseToOS>>(); } From ca4bf58e4ee5951473a861716193063c5ef83e9a Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Mon, 21 Dec 2020 15:40:46 -0800 Subject: [PATCH 138/378] [AMDGPU] Support unaligned flat scratch in TLI Adjust SITargetLowering::allowsMisalignedMemoryAccessesImpl for unaligned flat scratch support. Mostly needed for global isel. Differential Revision: https://reviews.llvm.org/D93669 --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 15 +++- llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll | 11 +-- .../CodeGen/AMDGPU/unaligned-load-store.ll | 69 ++++++++++--------- .../AMDGPU/adjust-alloca-alignment.ll | 35 +++------- 4 files changed, 61 insertions(+), 69 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 5fb1924bdd9fc..81fdfa0343b33 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -1470,12 +1470,21 @@ bool SITargetLowering::allowsMisalignedMemoryAccessesImpl( } } + if (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS) { + bool AlignedBy4 = Alignment >= Align(4); + if (IsFast) + *IsFast = AlignedBy4; + + return AlignedBy4 || + Subtarget->enableFlatScratch() || + Subtarget->hasUnalignedScratchAccess(); + } + // FIXME: We have to be conservative here and assume that flat operations // will access scratch. If we had access to the IR function, then we // could determine if any private memory was used in the function. - if (!Subtarget->hasUnalignedScratchAccess() && - (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS || - AddrSpace == AMDGPUAS::FLAT_ADDRESS)) { + if (AddrSpace == AMDGPUAS::FLAT_ADDRESS && + !Subtarget->hasUnalignedScratchAccess()) { bool AlignedBy4 = Alignment >= Align(4); if (IsFast) *IsFast = AlignedBy4; diff --git a/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll b/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll index 271f6c703980a..8e37b413ddf58 100644 --- a/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll +++ b/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll @@ -271,16 +271,9 @@ define amdgpu_kernel void @vload2_private(i16 addrspace(1)* nocapture readonly % ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: scratch_store_short off, v0, vcc_hi offset:8 ; FLATSCR-NEXT: s_mov_b32 vcc_hi, 0 -; FLATSCR-NEXT: scratch_load_ushort v0, off, vcc_hi offset:4 +; FLATSCR-NEXT: scratch_load_dword v0, off, vcc_hi offset:4 ; FLATSCR-NEXT: s_mov_b32 vcc_hi, 0 -; FLATSCR-NEXT: scratch_load_ushort v3, off, vcc_hi offset:6 -; FLATSCR-NEXT: s_mov_b32 vcc_hi, 0 -; FLATSCR-NEXT: s_waitcnt vmcnt(1) -; FLATSCR-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; FLATSCR-NEXT: s_waitcnt vmcnt(0) -; FLATSCR-NEXT: v_mov_b32_e32 v1, v3 -; FLATSCR-NEXT: scratch_load_short_d16_hi v1, off, vcc_hi offset:8 -; FLATSCR-NEXT: v_lshl_or_b32 v0, v3, 16, v0 +; FLATSCR-NEXT: scratch_load_dword v1, off, vcc_hi offset:6 ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] ; FLATSCR-NEXT: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/unaligned-load-store.ll b/llvm/test/CodeGen/AMDGPU/unaligned-load-store.ll index 5d5cfd318edfb..645eead8c2971 100644 --- a/llvm/test/CodeGen/AMDGPU/unaligned-load-store.ll +++ b/llvm/test/CodeGen/AMDGPU/unaligned-load-store.ll @@ -1,6 +1,7 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=ALIGNED %s -; RUN: llc -march=amdgcn -mcpu=bonaire -mattr=+unaligned-access-mode -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=UNALIGNED %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=ALIGNED %s +; RUN: llc -march=amdgcn -verify-machineinstrs< %s | FileCheck -check-prefixes=SI,MUBUF,ALIGNED %s +; RUN: llc -march=amdgcn -mcpu=bonaire -mattr=+unaligned-access-mode -verify-machineinstrs< %s | FileCheck -check-prefixes=SI,MUBUF,UNALIGNED %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs< %s | FileCheck -check-prefixes=SI,MUBUF,ALIGNED %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -amdgpu-enable-flat-scratch -verify-machineinstrs < %s | FileCheck -check-prefixes=SI,FLATSCR,ALIGNED %s ; SI-LABEL: {{^}}local_unaligned_load_store_i16: ; SI: ds_read_u8 @@ -602,64 +603,70 @@ define amdgpu_kernel void @local_store_align1_v16i8(<16 x i8> addrspace(3)* %out } ; SI-LABEL: {{^}}private_load_align1_f64: -; SI: buffer_load_ubyte -; SI: buffer_load_ubyte -; SI: buffer_load_ubyte -; SI: buffer_load_ubyte -; SI: buffer_load_ubyte -; SI: buffer_load_ubyte -; SI: buffer_load_ubyte -; SI: buffer_load_ubyte +; MUBUF: buffer_load_ubyte +; MUBUF: buffer_load_ubyte +; MUBUF: buffer_load_ubyte +; MUBUF: buffer_load_ubyte +; MUBUF: buffer_load_ubyte +; MUBUF: buffer_load_ubyte +; MUBUF: buffer_load_ubyte +; MUBUF: buffer_load_ubyte +; FLATSCR: scratch_load_dwordx2 define double @private_load_align1_f64(double addrspace(5)* %in) { %x = load double, double addrspace(5)* %in, align 1 ret double %x } ; SI-LABEL: {{^}}private_store_align1_f64: -; SI: buffer_store_byte -; SI: buffer_store_byte -; SI: buffer_store_byte -; SI: buffer_store_byte -; SI: buffer_store_byte -; SI: buffer_store_byte -; SI: buffer_store_byte -; SI: buffer_store_byte +; MUBUF: buffer_store_byte +; MUBUF: buffer_store_byte +; MUBUF: buffer_store_byte +; MUBUF: buffer_store_byte +; MUBUF: buffer_store_byte +; MUBUF: buffer_store_byte +; MUBUF: buffer_store_byte +; MUBUF: buffer_store_byte +; FLATSCR: scratch_store_dwordx2 define void @private_store_align1_f64(double addrspace(5)* %out, double %x) #0 { store double %x, double addrspace(5)* %out, align 1 ret void } ; SI-LABEL: {{^}}private_load_align4_f64: -; SI: buffer_load_dword -; SI: buffer_load_dword +; MUBUF: buffer_load_dword +; MUBUF: buffer_load_dword +; FLATSCR: scratch_load_dwordx2 define double @private_load_align4_f64(double addrspace(5)* %in) { %x = load double, double addrspace(5)* %in, align 4 ret double %x } ; SI-LABEL: {{^}}private_store_align4_f64: -; SI: buffer_store_dword -; SI: buffer_store_dword +; MUBUF: buffer_store_dword +; MUBUF: buffer_store_dword +; FLATSCR: scratch_store_dwordx2 define void @private_store_align4_f64(double addrspace(5)* %out, double %x) #0 { store double %x, double addrspace(5)* %out, align 4 ret void } ; SI-LABEL: {{^}}private_load_align2_f64: -; SI: buffer_load_ushort -; SI: buffer_load_ushort -; SI: buffer_load_ushort -; SI: buffer_load_ushort +; MUBUF: buffer_load_ushort +; MUBUF: buffer_load_ushort +; MUBUF: buffer_load_ushort +; MUBUF: buffer_load_ushort +; FLATSCR: scratch_load_dwordx2 define double @private_load_align2_f64(double addrspace(5)* %in) { %x = load double, double addrspace(5)* %in, align 2 ret double %x } ; SI-LABEL: {{^}}private_store_align2_f64: -; SI: buffer_store_short -; SI: buffer_store_short -; SI: buffer_store_short -; SI: buffer_store_short +; MUBUF: buffer_store_short +; MUBUF: buffer_store_short +; MUBUF: buffer_store_short +; MUBUF: buffer_store_short +; FLATSCR: scratch_store_dwordx2 define void @private_store_align2_f64(double addrspace(5)* %out, double %x) #0 { store double %x, double addrspace(5)* %out, align 2 ret void diff --git a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/adjust-alloca-alignment.ll b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/adjust-alloca-alignment.ll index a46f4d4175b7c..5bbacbf6a034c 100644 --- a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/adjust-alloca-alignment.ll +++ b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/adjust-alloca-alignment.ll @@ -9,9 +9,6 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 ; ALL-LABEL: @load_unknown_offset_align1_i8( ; ALL: alloca [128 x i8], align 1 ; UNALIGNED: load <2 x i8>, <2 x i8> addrspace(5)* %{{[0-9]+}}, align 1{{$}} - -; ALIGNED: load i8, i8 addrspace(5)* %ptr0, align 1{{$}} -; ALIGNED: load i8, i8 addrspace(5)* %ptr1, align 1{{$}} define amdgpu_kernel void @load_unknown_offset_align1_i8(i8 addrspace(1)* noalias %out, i32 %offset) #0 { %alloca = alloca [128 x i8], align 1, addrspace(5) %ptr0 = getelementptr inbounds [128 x i8], [128 x i8] addrspace(5)* %alloca, i32 0, i32 %offset @@ -60,13 +57,11 @@ define amdgpu_kernel void @load_unknown_offset_align1_i32(i32 addrspace(1)* noal ret void } -; FIXME: Should always increase alignment of the load ; Make sure alloca alignment isn't decreased ; ALL-LABEL: @load_alloca16_unknown_offset_align1_i32( ; ALL: alloca [128 x i32], align 16 -; UNALIGNED: load <2 x i32>, <2 x i32> addrspace(5)* %{{[0-9]+}}, align 1{{$}} -; ALIGNED: load <2 x i32>, <2 x i32> addrspace(5)* %{{[0-9]+}}, align 4{{$}} +; ALL: load <2 x i32>, <2 x i32> addrspace(5)* %{{[0-9]+}}, align 4{{$}} define amdgpu_kernel void @load_alloca16_unknown_offset_align1_i32(i32 addrspace(1)* noalias %out, i32 %offset) #0 { %alloca = alloca [128 x i32], align 16, addrspace(5) %ptr0 = getelementptr inbounds [128 x i32], [128 x i32] addrspace(5)* %alloca, i32 0, i32 %offset @@ -128,11 +123,8 @@ define amdgpu_kernel void @store_unknown_offset_align1_i32(i32 addrspace(1)* noa } ; ALL-LABEL: @merge_private_store_4_vector_elts_loads_v4i32( -; ALIGNED: %alloca = alloca [8 x i32], align 4, addrspace(5) -; ALIGNED: store <4 x i32> , <4 x i32> addrspace(5)* %1, align 4 - -; UNALIGNED: %alloca = alloca [8 x i32], align 1, addrspace(5) -; UNALIGNED: store <4 x i32> , <4 x i32> addrspace(5)* %1, align 1 +; ALL: %alloca = alloca [8 x i32], align 4, addrspace(5) +; ALL: store <4 x i32> , <4 x i32> addrspace(5)* %1, align 4 define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v4i32() { %alloca = alloca [8 x i32], align 1, addrspace(5) %out = bitcast [8 x i32] addrspace(5)* %alloca to i32 addrspace(5)* @@ -148,11 +140,8 @@ define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v4i32() { } ; ALL-LABEL: @merge_private_store_4_vector_elts_loads_v4i8( -; ALIGNED: %alloca = alloca [8 x i8], align 4, addrspace(5) -; ALIGNED: store <4 x i8> , <4 x i8> addrspace(5)* %1, align 4 - -; UNALIGNED: %alloca = alloca [8 x i8], align 1, addrspace(5) -; UNALIGNED: store <4 x i8> , <4 x i8> addrspace(5)* %1, align 1 +; ALL: %alloca = alloca [8 x i8], align 4, addrspace(5) +; ALL: store <4 x i8> , <4 x i8> addrspace(5)* %1, align 4 define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v4i8() { %alloca = alloca [8 x i8], align 1, addrspace(5) %out = bitcast [8 x i8] addrspace(5)* %alloca to i8 addrspace(5)* @@ -168,11 +157,8 @@ define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v4i8() { } ; ALL-LABEL: @merge_private_load_4_vector_elts_loads_v4i32( -; ALIGNED: %alloca = alloca [8 x i32], align 4, addrspace(5) -; ALIGNED: load <4 x i32>, <4 x i32> addrspace(5)* %1, align 4 - -; UNALIGNED: %alloca = alloca [8 x i32], align 1, addrspace(5) -; UNALIGNED: load <4 x i32>, <4 x i32> addrspace(5)* %1, align 1 +; ALL: %alloca = alloca [8 x i32], align 4, addrspace(5) +; ALL: load <4 x i32>, <4 x i32> addrspace(5)* %1, align 4 define amdgpu_kernel void @merge_private_load_4_vector_elts_loads_v4i32() { %alloca = alloca [8 x i32], align 1, addrspace(5) %out = bitcast [8 x i32] addrspace(5)* %alloca to i32 addrspace(5)* @@ -188,11 +174,8 @@ define amdgpu_kernel void @merge_private_load_4_vector_elts_loads_v4i32() { } ; ALL-LABEL: @merge_private_load_4_vector_elts_loads_v4i8( -; ALIGNED: %alloca = alloca [8 x i8], align 4, addrspace(5) -; ALIGNED: load <4 x i8>, <4 x i8> addrspace(5)* %1, align 4 - -; UNALIGNED: %alloca = alloca [8 x i8], align 1, addrspace(5) -; UNALIGNED: load <4 x i8>, <4 x i8> addrspace(5)* %1, align 1 +; ALL: %alloca = alloca [8 x i8], align 4, addrspace(5) +; ALL: load <4 x i8>, <4 x i8> addrspace(5)* %1, align 4 define amdgpu_kernel void @merge_private_load_4_vector_elts_loads_v4i8() { %alloca = alloca [8 x i8], align 1, addrspace(5) %out = bitcast [8 x i8] addrspace(5)* %alloca to i8 addrspace(5)* From d15119a02d92274cd7f779f4bb8485b1020110e0 Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Mon, 21 Dec 2020 12:31:06 -0800 Subject: [PATCH 139/378] [AMDGPU][GlobalISel] GlobalISel for flat scratch It does not seem to fold offsets but this is not specific to the flat scratch as getPtrBaseWithConstantOffset() does not return the split for these tests unlike its SDag counterpart. Differential Revision: https://reviews.llvm.org/D93670 --- llvm/lib/Target/AMDGPU/AMDGPUGISel.td | 8 + .../AMDGPU/AMDGPUInstructionSelector.cpp | 61 ++ .../Target/AMDGPU/AMDGPUInstructionSelector.h | 3 + .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 2 +- .../CodeGen/AMDGPU/GlobalISel/flat-scratch.ll | 759 ++++++++++++++++++ 5 files changed, 832 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td index 661b96a6a98e2..bba03736d01ae 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td @@ -85,6 +85,14 @@ def gi_mubuf_scratch_offen : GIComplexOperandMatcher, GIComplexPatternEquiv; +def gi_flat_scratch_offset : + GIComplexOperandMatcher, + GIComplexPatternEquiv; + +def gi_flat_scratch_saddr : + GIComplexOperandMatcher, + GIComplexPatternEquiv; + def gi_ds_1addr_1offset : GIComplexOperandMatcher, GIComplexPatternEquiv; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index b157c03672d12..6c2ff0972ae5b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -3589,6 +3589,67 @@ AMDGPUInstructionSelector::selectGlobalSAddr(MachineOperand &Root) const { }}}; } +InstructionSelector::ComplexRendererFns +AMDGPUInstructionSelector::selectScratchSAddr(MachineOperand &Root) const { + Register Addr = Root.getReg(); + Register PtrBase; + int64_t ConstOffset; + int64_t ImmOffset = 0; + + // Match the immediate offset first, which canonically is moved as low as + // possible. + std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(Addr, *MRI); + + if (ConstOffset != 0 && + TII.isLegalFLATOffset(ConstOffset, AMDGPUAS::PRIVATE_ADDRESS, true)) { + Addr = PtrBase; + ImmOffset = ConstOffset; + } + + auto AddrDef = getDefSrcRegIgnoringCopies(Addr, *MRI); + if (!AddrDef) + return None; + + if (AddrDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX) { + int FI = AddrDef->MI->getOperand(1).getIndex(); + return {{ + [=](MachineInstrBuilder &MIB) { MIB.addFrameIndex(FI); }, // saddr + [=](MachineInstrBuilder &MIB) { MIB.addImm(ImmOffset); } // offset + }}; + } + + Register SAddr = AddrDef->Reg; + + if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) { + Register LHS = AddrDef->MI->getOperand(1).getReg(); + Register RHS = AddrDef->MI->getOperand(2).getReg(); + auto LHSDef = getDefSrcRegIgnoringCopies(LHS, *MRI); + auto RHSDef = getDefSrcRegIgnoringCopies(RHS, *MRI); + + if (LHSDef && RHSDef && + LHSDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX && + isSGPR(RHSDef->Reg)) { + int FI = LHSDef->MI->getOperand(1).getIndex(); + MachineInstr &I = *Root.getParent(); + MachineBasicBlock *BB = I.getParent(); + const DebugLoc &DL = I.getDebugLoc(); + SAddr = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass); + + BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADD_U32), SAddr) + .addFrameIndex(FI) + .addReg(RHSDef->Reg); + } + } + + if (!isSGPR(SAddr)) + return None; + + return {{ + [=](MachineInstrBuilder &MIB) { MIB.addReg(SAddr); }, // saddr + [=](MachineInstrBuilder &MIB) { MIB.addImm(ImmOffset); } // offset + }}; +} + static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) { auto PSV = PtrInfo.V.dyn_cast(); return PSV && PSV->isStack(); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h index c575e7e9c8a5d..c6b26ea706596 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -200,6 +200,9 @@ class AMDGPUInstructionSelector final : public InstructionSelector { InstructionSelector::ComplexRendererFns selectGlobalSAddr(MachineOperand &Root) const; + InstructionSelector::ComplexRendererFns + selectScratchSAddr(MachineOperand &Root) const; + InstructionSelector::ComplexRendererFns selectMUBUFScratchOffen(MachineOperand &Root) const; InstructionSelector::ComplexRendererFns diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 9b39b86ae28fc..28cd867d40be2 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -240,7 +240,7 @@ static unsigned maxSizeForAddrSpace(const GCNSubtarget &ST, unsigned AS, switch (AS) { case AMDGPUAS::PRIVATE_ADDRESS: // FIXME: Private element size. - return 32; + return ST.enableFlatScratch() ? 128 : 32; case AMDGPUAS::LOCAL_ADDRESS: return ST.useDS128() ? 128 : 64; case AMDGPUAS::GLOBAL_ADDRESS: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll new file mode 100644 index 0000000000000..2fe0c29e54de6 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll @@ -0,0 +1,759 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=amdgcn -mcpu=gfx900 -global-isel -mattr=-promote-alloca -amdgpu-enable-flat-scratch -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 %s +; RUN: llc -march=amdgcn -mcpu=gfx1030 -global-isel -mattr=-promote-alloca -amdgpu-enable-flat-scratch -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s + +define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) { +; GFX9-LABEL: store_load_sindex_kernel: +; GFX9: ; %bb.0: ; %bb +; GFX9-NEXT: s_load_dword s0, s[0:1], 0x24 +; GFX9-NEXT: s_add_u32 flat_scratch_lo, s2, s5 +; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s3, 0 +; GFX9-NEXT: v_mov_b32_e32 v0, 15 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_lshl_b32 s1, s0, 2 +; GFX9-NEXT: s_and_b32 s0, s0, 15 +; GFX9-NEXT: s_lshl_b32 s0, s0, 2 +; GFX9-NEXT: s_add_u32 s1, 4, s1 +; GFX9-NEXT: scratch_store_dword off, v0, s1 +; GFX9-NEXT: s_add_u32 s0, 4, s0 +; GFX9-NEXT: scratch_load_dword v0, off, s0 +; GFX9-NEXT: s_endpgm +; +; GFX10-LABEL: store_load_sindex_kernel: +; GFX10: ; %bb.0: ; %bb +; GFX10-NEXT: s_add_u32 s2, s2, s5 +; GFX10-NEXT: s_addc_u32 s3, s3, 0 +; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2 +; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3 +; GFX10-NEXT: s_load_dword s0, s[0:1], 0x24 +; GFX10-NEXT: v_mov_b32_e32 v0, 15 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: s_and_b32 s1, s0, 15 +; GFX10-NEXT: s_lshl_b32 s0, s0, 2 +; GFX10-NEXT: s_lshl_b32 s1, s1, 2 +; GFX10-NEXT: s_add_u32 s0, 4, s0 +; GFX10-NEXT: s_add_u32 s1, 4, s1 +; GFX10-NEXT: scratch_store_dword off, v0, s0 +; GFX10-NEXT: scratch_load_dword v0, off, s1 +; GFX10-NEXT: s_endpgm +bb: + %i = alloca [32 x float], align 4, addrspace(5) + %i1 = bitcast [32 x float] addrspace(5)* %i to i8 addrspace(5)* + %i7 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %i, i32 0, i32 %idx + %i8 = bitcast float addrspace(5)* %i7 to i32 addrspace(5)* + store volatile i32 15, i32 addrspace(5)* %i8, align 4 + %i9 = and i32 %idx, 15 + %i10 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %i, i32 0, i32 %i9 + %i11 = bitcast float addrspace(5)* %i10 to i32 addrspace(5)* + %i12 = load volatile i32, i32 addrspace(5)* %i11, align 4 + ret void +} + +define amdgpu_kernel void @store_load_vindex_kernel() { +; GFX9-LABEL: store_load_vindex_kernel: +; GFX9: ; %bb.0: ; %bb +; GFX9-NEXT: v_lshlrev_b32_e32 v1, 2, v0 +; GFX9-NEXT: v_sub_u32_e32 v0, 0, v0 +; GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s3 +; GFX9-NEXT: v_mov_b32_e32 v2, 4 +; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX9-NEXT: v_add_u32_e32 v0, v2, v0 +; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 +; GFX9-NEXT: v_add_u32_e32 v1, v2, v1 +; GFX9-NEXT: v_mov_b32_e32 v3, 15 +; GFX9-NEXT: scratch_store_dword v1, v3, off +; GFX9-NEXT: v_add_u32_e32 v0, 0x7c, v0 +; GFX9-NEXT: scratch_load_dword v0, v0, off +; GFX9-NEXT: s_endpgm +; +; GFX10-LABEL: store_load_vindex_kernel: +; GFX10: ; %bb.0: ; %bb +; GFX10-NEXT: s_add_u32 s0, s0, s3 +; GFX10-NEXT: s_addc_u32 s1, s1, 0 +; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0 +; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1 +; GFX10-NEXT: v_sub_nc_u32_e32 v1, 0, v0 +; GFX10-NEXT: v_mov_b32_e32 v2, 4 +; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX10-NEXT: v_lshlrev_b32_e32 v1, 2, v1 +; GFX10-NEXT: v_add_nc_u32_e32 v0, v2, v0 +; GFX10-NEXT: v_add_nc_u32_e32 v1, v2, v1 +; GFX10-NEXT: v_mov_b32_e32 v2, 15 +; GFX10-NEXT: v_add_nc_u32_e32 v1, 0x7c, v1 +; GFX10-NEXT: scratch_store_dword v0, v2, off +; GFX10-NEXT: scratch_load_dword v0, v1, off +; GFX10-NEXT: s_endpgm +bb: + %i = alloca [32 x float], align 4, addrspace(5) + %i1 = bitcast [32 x float] addrspace(5)* %i to i8 addrspace(5)* + %i2 = tail call i32 @llvm.amdgcn.workitem.id.x() + %i3 = zext i32 %i2 to i64 + %i7 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %i, i32 0, i32 %i2 + %i8 = bitcast float addrspace(5)* %i7 to i32 addrspace(5)* + store volatile i32 15, i32 addrspace(5)* %i8, align 4 + %i9 = sub nsw i32 31, %i2 + %i10 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %i, i32 0, i32 %i9 + %i11 = bitcast float addrspace(5)* %i10 to i32 addrspace(5)* + %i12 = load volatile i32, i32 addrspace(5)* %i11, align 4 + ret void +} + +define void @store_load_vindex_foo(i32 %idx) { +; GFX9-LABEL: store_load_vindex_foo: +; GFX9: ; %bb.0: ; %bb +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_lshlrev_b32_e32 v1, 2, v0 +; GFX9-NEXT: v_and_b32_e32 v0, 15, v0 +; GFX9-NEXT: v_mov_b32_e32 v2, s32 +; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX9-NEXT: v_add_u32_e32 v1, v2, v1 +; GFX9-NEXT: v_mov_b32_e32 v3, 15 +; GFX9-NEXT: scratch_store_dword v1, v3, off +; GFX9-NEXT: v_add_u32_e32 v0, v2, v0 +; GFX9-NEXT: scratch_load_dword v0, v0, off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: store_load_vindex_foo: +; GFX10: ; %bb.0: ; %bb +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_and_b32_e32 v1, 15, v0 +; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX10-NEXT: v_mov_b32_e32 v2, s32 +; GFX10-NEXT: v_mov_b32_e32 v3, 15 +; GFX10-NEXT: v_lshlrev_b32_e32 v1, 2, v1 +; GFX10-NEXT: v_add_nc_u32_e32 v0, v2, v0 +; GFX10-NEXT: v_add_nc_u32_e32 v1, v2, v1 +; GFX10-NEXT: scratch_store_dword v0, v3, off +; GFX10-NEXT: scratch_load_dword v0, v1, off +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_setpc_b64 s[30:31] +bb: + %i = alloca [32 x float], align 4, addrspace(5) + %i1 = bitcast [32 x float] addrspace(5)* %i to i8 addrspace(5)* + %i7 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %i, i32 0, i32 %idx + %i8 = bitcast float addrspace(5)* %i7 to i32 addrspace(5)* + store volatile i32 15, i32 addrspace(5)* %i8, align 4 + %i9 = and i32 %idx, 15 + %i10 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %i, i32 0, i32 %i9 + %i11 = bitcast float addrspace(5)* %i10 to i32 addrspace(5)* + %i12 = load volatile i32, i32 addrspace(5)* %i11, align 4 + ret void +} + +define void @private_ptr_foo(float addrspace(5)* nocapture %arg) { +; GFX9-LABEL: private_ptr_foo: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_add_u32_e32 v0, 4, v0 +; GFX9-NEXT: v_mov_b32_e32 v1, 0x41200000 +; GFX9-NEXT: scratch_store_dword v0, v1, off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: private_ptr_foo: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_add_nc_u32_e32 v0, 4, v0 +; GFX10-NEXT: v_mov_b32_e32 v1, 0x41200000 +; GFX10-NEXT: scratch_store_dword v0, v1, off +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_setpc_b64 s[30:31] + %gep = getelementptr inbounds float, float addrspace(5)* %arg, i32 1 + store float 1.000000e+01, float addrspace(5)* %gep, align 4 + ret void +} + +define amdgpu_kernel void @store_load_sindex_small_offset_kernel(i32 %idx) { +; GFX9-LABEL: store_load_sindex_small_offset_kernel: +; GFX9: ; %bb.0: ; %bb +; GFX9-NEXT: s_load_dword s0, s[0:1], 0x24 +; GFX9-NEXT: s_add_u32 flat_scratch_lo, s2, s5 +; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s3, 0 +; GFX9-NEXT: s_add_u32 s2, 4, 0 +; GFX9-NEXT: v_mov_b32_e32 v0, 15 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_lshl_b32 s1, s0, 2 +; GFX9-NEXT: s_and_b32 s0, s0, 15 +; GFX9-NEXT: s_lshl_b32 s0, s0, 2 +; GFX9-NEXT: s_add_u32 s1, 0x104, s1 +; GFX9-NEXT: scratch_load_dword v1, off, s2 +; GFX9-NEXT: scratch_store_dword off, v0, s1 +; GFX9-NEXT: s_add_u32 s0, 0x104, s0 +; GFX9-NEXT: scratch_load_dword v0, off, s0 +; GFX9-NEXT: s_endpgm +; +; GFX10-LABEL: store_load_sindex_small_offset_kernel: +; GFX10: ; %bb.0: ; %bb +; GFX10-NEXT: s_add_u32 s2, s2, s5 +; GFX10-NEXT: s_addc_u32 s3, s3, 0 +; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2 +; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3 +; GFX10-NEXT: s_load_dword s0, s[0:1], 0x24 +; GFX10-NEXT: s_add_u32 s1, 4, 0 +; GFX10-NEXT: scratch_load_dword v0, off, s1 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: v_mov_b32_e32 v0, 15 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: s_and_b32 s1, s0, 15 +; GFX10-NEXT: s_lshl_b32 s0, s0, 2 +; GFX10-NEXT: s_lshl_b32 s1, s1, 2 +; GFX10-NEXT: s_add_u32 s0, 0x104, s0 +; GFX10-NEXT: s_add_u32 s1, 0x104, s1 +; GFX10-NEXT: scratch_store_dword off, v0, s0 +; GFX10-NEXT: scratch_load_dword v0, off, s1 +; GFX10-NEXT: s_endpgm +bb: + %padding = alloca [64 x i32], align 4, addrspace(5) + %i = alloca [32 x float], align 4, addrspace(5) + %pad_gep = getelementptr inbounds [64 x i32], [64 x i32] addrspace(5)* %padding, i32 0, i32 undef + %pad_load = load volatile i32, i32 addrspace(5)* %pad_gep, align 4 + %i1 = bitcast [32 x float] addrspace(5)* %i to i8 addrspace(5)* + %i7 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %i, i32 0, i32 %idx + %i8 = bitcast float addrspace(5)* %i7 to i32 addrspace(5)* + store volatile i32 15, i32 addrspace(5)* %i8, align 4 + %i9 = and i32 %idx, 15 + %i10 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %i, i32 0, i32 %i9 + %i11 = bitcast float addrspace(5)* %i10 to i32 addrspace(5)* + %i12 = load volatile i32, i32 addrspace(5)* %i11, align 4 + ret void +} + +define amdgpu_kernel void @store_load_vindex_small_offset_kernel() { +; GFX9-LABEL: store_load_vindex_small_offset_kernel: +; GFX9: ; %bb.0: ; %bb +; GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s3 +; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 +; GFX9-NEXT: s_add_u32 s0, 4, 0 +; GFX9-NEXT: scratch_load_dword v1, off, s0 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_lshlrev_b32_e32 v1, 2, v0 +; GFX9-NEXT: v_sub_u32_e32 v0, 0, v0 +; GFX9-NEXT: v_mov_b32_e32 v2, 0x104 +; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX9-NEXT: v_add_u32_e32 v0, v2, v0 +; GFX9-NEXT: v_add_u32_e32 v1, v2, v1 +; GFX9-NEXT: v_mov_b32_e32 v3, 15 +; GFX9-NEXT: scratch_store_dword v1, v3, off +; GFX9-NEXT: v_add_u32_e32 v0, 0x7c, v0 +; GFX9-NEXT: scratch_load_dword v0, v0, off +; GFX9-NEXT: s_endpgm +; +; GFX10-LABEL: store_load_vindex_small_offset_kernel: +; GFX10: ; %bb.0: ; %bb +; GFX10-NEXT: s_add_u32 s0, s0, s3 +; GFX10-NEXT: s_addc_u32 s1, s1, 0 +; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0 +; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1 +; GFX10-NEXT: v_sub_nc_u32_e32 v1, 0, v0 +; GFX10-NEXT: v_mov_b32_e32 v2, 0x104 +; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX10-NEXT: s_add_u32 s0, 4, 0 +; GFX10-NEXT: v_lshlrev_b32_e32 v1, 2, v1 +; GFX10-NEXT: scratch_load_dword v3, off, s0 +; GFX10-NEXT: v_add_nc_u32_e32 v0, v2, v0 +; GFX10-NEXT: v_add_nc_u32_e32 v1, v2, v1 +; GFX10-NEXT: v_mov_b32_e32 v2, 15 +; GFX10-NEXT: v_add_nc_u32_e32 v1, 0x7c, v1 +; GFX10-NEXT: scratch_store_dword v0, v2, off +; GFX10-NEXT: scratch_load_dword v0, v1, off +; GFX10-NEXT: s_endpgm +bb: + %padding = alloca [64 x i32], align 4, addrspace(5) + %i = alloca [32 x float], align 4, addrspace(5) + %pad_gep = getelementptr inbounds [64 x i32], [64 x i32] addrspace(5)* %padding, i32 0, i32 undef + %pad_load = load volatile i32, i32 addrspace(5)* %pad_gep, align 4 + %i1 = bitcast [32 x float] addrspace(5)* %i to i8 addrspace(5)* + %i2 = tail call i32 @llvm.amdgcn.workitem.id.x() + %i3 = zext i32 %i2 to i64 + %i7 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %i, i32 0, i32 %i2 + %i8 = bitcast float addrspace(5)* %i7 to i32 addrspace(5)* + store volatile i32 15, i32 addrspace(5)* %i8, align 4 + %i9 = sub nsw i32 31, %i2 + %i10 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %i, i32 0, i32 %i9 + %i11 = bitcast float addrspace(5)* %i10 to i32 addrspace(5)* + %i12 = load volatile i32, i32 addrspace(5)* %i11, align 4 + ret void +} + +define void @store_load_vindex_small_offset_foo(i32 %idx) { +; GFX9-LABEL: store_load_vindex_small_offset_foo: +; GFX9: ; %bb.0: ; %bb +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_add_u32 s0, s32, 0 +; GFX9-NEXT: scratch_load_dword v1, off, s0 +; GFX9-NEXT: s_add_u32 vcc_hi, s32, 0x100 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_lshlrev_b32_e32 v1, 2, v0 +; GFX9-NEXT: v_and_b32_e32 v0, 15, v0 +; GFX9-NEXT: v_mov_b32_e32 v2, vcc_hi +; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX9-NEXT: v_add_u32_e32 v1, v2, v1 +; GFX9-NEXT: v_mov_b32_e32 v3, 15 +; GFX9-NEXT: scratch_store_dword v1, v3, off +; GFX9-NEXT: v_add_u32_e32 v0, v2, v0 +; GFX9-NEXT: scratch_load_dword v0, v0, off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: store_load_vindex_small_offset_foo: +; GFX10: ; %bb.0: ; %bb +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_and_b32_e32 v1, 15, v0 +; GFX10-NEXT: s_add_u32 vcc_lo, s32, 0x100 +; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX10-NEXT: v_mov_b32_e32 v2, vcc_lo +; GFX10-NEXT: v_mov_b32_e32 v3, 15 +; GFX10-NEXT: v_lshlrev_b32_e32 v1, 2, v1 +; GFX10-NEXT: s_add_u32 s0, s32, 0 +; GFX10-NEXT: v_add_nc_u32_e32 v0, v2, v0 +; GFX10-NEXT: v_add_nc_u32_e32 v1, v2, v1 +; GFX10-NEXT: scratch_load_dword v2, off, s0 +; GFX10-NEXT: scratch_store_dword v0, v3, off +; GFX10-NEXT: scratch_load_dword v0, v1, off +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_setpc_b64 s[30:31] +bb: + %padding = alloca [64 x i32], align 4, addrspace(5) + %i = alloca [32 x float], align 4, addrspace(5) + %pad_gep = getelementptr inbounds [64 x i32], [64 x i32] addrspace(5)* %padding, i32 0, i32 undef + %pad_load = load volatile i32, i32 addrspace(5)* %pad_gep, align 4 + %i1 = bitcast [32 x float] addrspace(5)* %i to i8 addrspace(5)* + %i7 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %i, i32 0, i32 %idx + %i8 = bitcast float addrspace(5)* %i7 to i32 addrspace(5)* + store volatile i32 15, i32 addrspace(5)* %i8, align 4 + %i9 = and i32 %idx, 15 + %i10 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %i, i32 0, i32 %i9 + %i11 = bitcast float addrspace(5)* %i10 to i32 addrspace(5)* + %i12 = load volatile i32, i32 addrspace(5)* %i11, align 4 + ret void +} + +define amdgpu_kernel void @store_load_sindex_large_offset_kernel(i32 %idx) { +; GFX9-LABEL: store_load_sindex_large_offset_kernel: +; GFX9: ; %bb.0: ; %bb +; GFX9-NEXT: s_load_dword s0, s[0:1], 0x24 +; GFX9-NEXT: s_add_u32 flat_scratch_lo, s2, s5 +; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s3, 0 +; GFX9-NEXT: s_add_u32 s2, 4, 0 +; GFX9-NEXT: v_mov_b32_e32 v0, 15 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_lshl_b32 s1, s0, 2 +; GFX9-NEXT: s_and_b32 s0, s0, 15 +; GFX9-NEXT: s_lshl_b32 s0, s0, 2 +; GFX9-NEXT: s_add_u32 s1, 0x4004, s1 +; GFX9-NEXT: scratch_load_dword v1, off, s2 +; GFX9-NEXT: scratch_store_dword off, v0, s1 +; GFX9-NEXT: s_add_u32 s0, 0x4004, s0 +; GFX9-NEXT: scratch_load_dword v0, off, s0 +; GFX9-NEXT: s_endpgm +; +; GFX10-LABEL: store_load_sindex_large_offset_kernel: +; GFX10: ; %bb.0: ; %bb +; GFX10-NEXT: s_add_u32 s2, s2, s5 +; GFX10-NEXT: s_addc_u32 s3, s3, 0 +; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2 +; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3 +; GFX10-NEXT: s_load_dword s0, s[0:1], 0x24 +; GFX10-NEXT: s_add_u32 s1, 4, 0 +; GFX10-NEXT: scratch_load_dword v0, off, s1 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: v_mov_b32_e32 v0, 15 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: s_and_b32 s1, s0, 15 +; GFX10-NEXT: s_lshl_b32 s0, s0, 2 +; GFX10-NEXT: s_lshl_b32 s1, s1, 2 +; GFX10-NEXT: s_add_u32 s0, 0x4004, s0 +; GFX10-NEXT: s_add_u32 s1, 0x4004, s1 +; GFX10-NEXT: scratch_store_dword off, v0, s0 +; GFX10-NEXT: scratch_load_dword v0, off, s1 +; GFX10-NEXT: s_endpgm +bb: + %padding = alloca [4096 x i32], align 4, addrspace(5) + %i = alloca [32 x float], align 4, addrspace(5) + %pad_gep = getelementptr inbounds [4096 x i32], [4096 x i32] addrspace(5)* %padding, i32 0, i32 undef + %pad_load = load volatile i32, i32 addrspace(5)* %pad_gep, align 4 + %i1 = bitcast [32 x float] addrspace(5)* %i to i8 addrspace(5)* + %i7 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %i, i32 0, i32 %idx + %i8 = bitcast float addrspace(5)* %i7 to i32 addrspace(5)* + store volatile i32 15, i32 addrspace(5)* %i8, align 4 + %i9 = and i32 %idx, 15 + %i10 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %i, i32 0, i32 %i9 + %i11 = bitcast float addrspace(5)* %i10 to i32 addrspace(5)* + %i12 = load volatile i32, i32 addrspace(5)* %i11, align 4 + ret void +} + +define amdgpu_kernel void @store_load_vindex_large_offset_kernel() { +; GFX9-LABEL: store_load_vindex_large_offset_kernel: +; GFX9: ; %bb.0: ; %bb +; GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s3 +; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 +; GFX9-NEXT: s_add_u32 s0, 4, 0 +; GFX9-NEXT: scratch_load_dword v1, off, s0 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_lshlrev_b32_e32 v1, 2, v0 +; GFX9-NEXT: v_sub_u32_e32 v0, 0, v0 +; GFX9-NEXT: v_mov_b32_e32 v2, 0x4004 +; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX9-NEXT: v_add_u32_e32 v0, v2, v0 +; GFX9-NEXT: v_add_u32_e32 v1, v2, v1 +; GFX9-NEXT: v_mov_b32_e32 v3, 15 +; GFX9-NEXT: scratch_store_dword v1, v3, off +; GFX9-NEXT: v_add_u32_e32 v0, 0x7c, v0 +; GFX9-NEXT: scratch_load_dword v0, v0, off +; GFX9-NEXT: s_endpgm +; +; GFX10-LABEL: store_load_vindex_large_offset_kernel: +; GFX10: ; %bb.0: ; %bb +; GFX10-NEXT: s_add_u32 s0, s0, s3 +; GFX10-NEXT: s_addc_u32 s1, s1, 0 +; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0 +; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1 +; GFX10-NEXT: v_sub_nc_u32_e32 v1, 0, v0 +; GFX10-NEXT: v_mov_b32_e32 v2, 0x4004 +; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX10-NEXT: s_add_u32 s0, 4, 0 +; GFX10-NEXT: v_lshlrev_b32_e32 v1, 2, v1 +; GFX10-NEXT: scratch_load_dword v3, off, s0 +; GFX10-NEXT: v_add_nc_u32_e32 v0, v2, v0 +; GFX10-NEXT: v_add_nc_u32_e32 v1, v2, v1 +; GFX10-NEXT: v_mov_b32_e32 v2, 15 +; GFX10-NEXT: v_add_nc_u32_e32 v1, 0x7c, v1 +; GFX10-NEXT: scratch_store_dword v0, v2, off +; GFX10-NEXT: scratch_load_dword v0, v1, off +; GFX10-NEXT: s_endpgm +bb: + %padding = alloca [4096 x i32], align 4, addrspace(5) + %i = alloca [32 x float], align 4, addrspace(5) + %pad_gep = getelementptr inbounds [4096 x i32], [4096 x i32] addrspace(5)* %padding, i32 0, i32 undef + %pad_load = load volatile i32, i32 addrspace(5)* %pad_gep, align 4 + %i1 = bitcast [32 x float] addrspace(5)* %i to i8 addrspace(5)* + %i2 = tail call i32 @llvm.amdgcn.workitem.id.x() + %i3 = zext i32 %i2 to i64 + %i7 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %i, i32 0, i32 %i2 + %i8 = bitcast float addrspace(5)* %i7 to i32 addrspace(5)* + store volatile i32 15, i32 addrspace(5)* %i8, align 4 + %i9 = sub nsw i32 31, %i2 + %i10 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %i, i32 0, i32 %i9 + %i11 = bitcast float addrspace(5)* %i10 to i32 addrspace(5)* + %i12 = load volatile i32, i32 addrspace(5)* %i11, align 4 + ret void +} + +define void @store_load_vindex_large_offset_foo(i32 %idx) { +; GFX9-LABEL: store_load_vindex_large_offset_foo: +; GFX9: ; %bb.0: ; %bb +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_add_u32 s0, s32, 0 +; GFX9-NEXT: scratch_load_dword v1, off, s0 +; GFX9-NEXT: s_add_u32 vcc_hi, s32, 0x4000 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_lshlrev_b32_e32 v1, 2, v0 +; GFX9-NEXT: v_and_b32_e32 v0, 15, v0 +; GFX9-NEXT: v_mov_b32_e32 v2, vcc_hi +; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX9-NEXT: v_add_u32_e32 v1, v2, v1 +; GFX9-NEXT: v_mov_b32_e32 v3, 15 +; GFX9-NEXT: scratch_store_dword v1, v3, off +; GFX9-NEXT: v_add_u32_e32 v0, v2, v0 +; GFX9-NEXT: scratch_load_dword v0, v0, off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: store_load_vindex_large_offset_foo: +; GFX10: ; %bb.0: ; %bb +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_and_b32_e32 v1, 15, v0 +; GFX10-NEXT: s_add_u32 vcc_lo, s32, 0x4000 +; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX10-NEXT: v_mov_b32_e32 v2, vcc_lo +; GFX10-NEXT: v_mov_b32_e32 v3, 15 +; GFX10-NEXT: v_lshlrev_b32_e32 v1, 2, v1 +; GFX10-NEXT: s_add_u32 s0, s32, 0 +; GFX10-NEXT: v_add_nc_u32_e32 v0, v2, v0 +; GFX10-NEXT: v_add_nc_u32_e32 v1, v2, v1 +; GFX10-NEXT: scratch_load_dword v2, off, s0 +; GFX10-NEXT: scratch_store_dword v0, v3, off +; GFX10-NEXT: scratch_load_dword v0, v1, off +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_setpc_b64 s[30:31] +bb: + %padding = alloca [4096 x i32], align 4, addrspace(5) + %i = alloca [32 x float], align 4, addrspace(5) + %pad_gep = getelementptr inbounds [4096 x i32], [4096 x i32] addrspace(5)* %padding, i32 0, i32 undef + %pad_load = load volatile i32, i32 addrspace(5)* %pad_gep, align 4 + %i1 = bitcast [32 x float] addrspace(5)* %i to i8 addrspace(5)* + %i7 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %i, i32 0, i32 %idx + %i8 = bitcast float addrspace(5)* %i7 to i32 addrspace(5)* + store volatile i32 15, i32 addrspace(5)* %i8, align 4 + %i9 = and i32 %idx, 15 + %i10 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %i, i32 0, i32 %i9 + %i11 = bitcast float addrspace(5)* %i10 to i32 addrspace(5)* + %i12 = load volatile i32, i32 addrspace(5)* %i11, align 4 + ret void +} + +define amdgpu_kernel void @store_load_large_imm_offset_kernel() { +; GFX9-LABEL: store_load_large_imm_offset_kernel: +; GFX9: ; %bb.0: ; %bb +; GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s3 +; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 +; GFX9-NEXT: v_mov_b32_e32 v0, 13 +; GFX9-NEXT: s_add_u32 s0, 4, 0 +; GFX9-NEXT: scratch_store_dword off, v0, s0 +; GFX9-NEXT: s_movk_i32 s0, 0x3e80 +; GFX9-NEXT: v_mov_b32_e32 v0, 15 +; GFX9-NEXT: s_add_u32 s0, 4, s0 +; GFX9-NEXT: scratch_store_dword off, v0, s0 +; GFX9-NEXT: scratch_load_dword v0, off, s0 +; GFX9-NEXT: s_endpgm +; +; GFX10-LABEL: store_load_large_imm_offset_kernel: +; GFX10: ; %bb.0: ; %bb +; GFX10-NEXT: s_add_u32 s0, s0, s3 +; GFX10-NEXT: s_addc_u32 s1, s1, 0 +; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0 +; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1 +; GFX10-NEXT: v_mov_b32_e32 v0, 13 +; GFX10-NEXT: v_mov_b32_e32 v1, 15 +; GFX10-NEXT: s_movk_i32 s0, 0x3e80 +; GFX10-NEXT: s_add_u32 s1, 4, 0 +; GFX10-NEXT: s_add_u32 s0, 4, s0 +; GFX10-NEXT: scratch_store_dword off, v0, s1 +; GFX10-NEXT: scratch_store_dword off, v1, s0 +; GFX10-NEXT: scratch_load_dword v0, off, s0 +; GFX10-NEXT: s_endpgm +bb: + %i = alloca [4096 x i32], align 4, addrspace(5) + %i1 = getelementptr inbounds [4096 x i32], [4096 x i32] addrspace(5)* %i, i32 0, i32 undef + store volatile i32 13, i32 addrspace(5)* %i1, align 4 + %i7 = getelementptr inbounds [4096 x i32], [4096 x i32] addrspace(5)* %i, i32 0, i32 4000 + store volatile i32 15, i32 addrspace(5)* %i7, align 4 + %i10 = getelementptr inbounds [4096 x i32], [4096 x i32] addrspace(5)* %i, i32 0, i32 4000 + %i12 = load volatile i32, i32 addrspace(5)* %i10, align 4 + ret void +} + +define void @store_load_large_imm_offset_foo() { +; GFX9-LABEL: store_load_large_imm_offset_foo: +; GFX9: ; %bb.0: ; %bb +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, 13 +; GFX9-NEXT: s_add_u32 s0, s32, 0 +; GFX9-NEXT: scratch_store_dword off, v0, s0 +; GFX9-NEXT: s_movk_i32 s0, 0x3e80 +; GFX9-NEXT: v_mov_b32_e32 v0, 15 +; GFX9-NEXT: s_add_u32 s0, s32, s0 +; GFX9-NEXT: scratch_store_dword off, v0, s0 +; GFX9-NEXT: scratch_load_dword v0, off, s0 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: store_load_large_imm_offset_foo: +; GFX10: ; %bb.0: ; %bb +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_mov_b32_e32 v0, 13 +; GFX10-NEXT: v_mov_b32_e32 v1, 15 +; GFX10-NEXT: s_movk_i32 s0, 0x3e80 +; GFX10-NEXT: s_add_u32 s1, s32, 0 +; GFX10-NEXT: s_add_u32 s0, s32, s0 +; GFX10-NEXT: scratch_store_dword off, v0, s1 +; GFX10-NEXT: scratch_store_dword off, v1, s0 +; GFX10-NEXT: scratch_load_dword v0, off, s0 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_setpc_b64 s[30:31] +bb: + %i = alloca [4096 x i32], align 4, addrspace(5) + %i1 = getelementptr inbounds [4096 x i32], [4096 x i32] addrspace(5)* %i, i32 0, i32 undef + store volatile i32 13, i32 addrspace(5)* %i1, align 4 + %i7 = getelementptr inbounds [4096 x i32], [4096 x i32] addrspace(5)* %i, i32 0, i32 4000 + store volatile i32 15, i32 addrspace(5)* %i7, align 4 + %i10 = getelementptr inbounds [4096 x i32], [4096 x i32] addrspace(5)* %i, i32 0, i32 4000 + %i12 = load volatile i32, i32 addrspace(5)* %i10, align 4 + ret void +} + +define amdgpu_kernel void @store_load_vidx_sidx_offset(i32 %sidx) { +; GFX9-LABEL: store_load_vidx_sidx_offset: +; GFX9: ; %bb.0: ; %bb +; GFX9-NEXT: s_load_dword s0, s[0:1], 0x24 +; GFX9-NEXT: s_add_u32 flat_scratch_lo, s2, s5 +; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s3, 0 +; GFX9-NEXT: v_mov_b32_e32 v1, 15 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_add_lshl_u32 v0, s0, v0, 2 +; GFX9-NEXT: v_add_u32_e32 v0, 4, v0 +; GFX9-NEXT: v_add_u32_e32 v0, 0x400, v0 +; GFX9-NEXT: scratch_store_dword v0, v1, off +; GFX9-NEXT: scratch_load_dword v0, v0, off +; GFX9-NEXT: s_endpgm +; +; GFX10-LABEL: store_load_vidx_sidx_offset: +; GFX10: ; %bb.0: ; %bb +; GFX10-NEXT: s_add_u32 s2, s2, s5 +; GFX10-NEXT: s_addc_u32 s3, s3, 0 +; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2 +; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3 +; GFX10-NEXT: s_load_dword s0, s[0:1], 0x24 +; GFX10-NEXT: v_mov_b32_e32 v1, 15 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: v_add_lshl_u32 v0, s0, v0, 2 +; GFX10-NEXT: v_add_nc_u32_e32 v0, 4, v0 +; GFX10-NEXT: v_add_nc_u32_e32 v0, 0x400, v0 +; GFX10-NEXT: scratch_store_dword v0, v1, off +; GFX10-NEXT: scratch_load_dword v0, v0, off +; GFX10-NEXT: s_endpgm +bb: + %alloca = alloca [32 x i32], align 4, addrspace(5) + %vidx = tail call i32 @llvm.amdgcn.workitem.id.x() + %add1 = add nsw i32 %sidx, %vidx + %add2 = add nsw i32 %add1, 256 + %gep = getelementptr inbounds [32 x i32], [32 x i32] addrspace(5)* %alloca, i32 0, i32 %add2 + store volatile i32 15, i32 addrspace(5)* %gep, align 4 + %load = load volatile i32, i32 addrspace(5)* %gep, align 4 + ret void +} + +define void @store_load_i64_aligned(i64 addrspace(5)* nocapture %arg) { +; GFX9-LABEL: store_load_i64_aligned: +; GFX9: ; %bb.0: ; %bb +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v1, 15 +; GFX9-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-NEXT: scratch_store_dwordx2 v0, v[1:2], off +; GFX9-NEXT: scratch_load_dwordx2 v[0:1], v0, off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: store_load_i64_aligned: +; GFX10: ; %bb.0: ; %bb +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_mov_b32_e32 v1, 15 +; GFX10-NEXT: v_mov_b32_e32 v2, 0 +; GFX10-NEXT: scratch_store_dwordx2 v0, v[1:2], off +; GFX10-NEXT: scratch_load_dwordx2 v[0:1], v0, off +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_setpc_b64 s[30:31] +bb: + store volatile i64 15, i64 addrspace(5)* %arg, align 8 + %load = load volatile i64, i64 addrspace(5)* %arg, align 8 + ret void +} + +define void @store_load_i64_unaligned(i64 addrspace(5)* nocapture %arg) { +; GFX9-LABEL: store_load_i64_unaligned: +; GFX9: ; %bb.0: ; %bb +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v1, 15 +; GFX9-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-NEXT: scratch_store_dwordx2 v0, v[1:2], off +; GFX9-NEXT: scratch_load_dwordx2 v[0:1], v0, off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: store_load_i64_unaligned: +; GFX10: ; %bb.0: ; %bb +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_mov_b32_e32 v1, 15 +; GFX10-NEXT: v_mov_b32_e32 v2, 0 +; GFX10-NEXT: scratch_store_dwordx2 v0, v[1:2], off +; GFX10-NEXT: scratch_load_dwordx2 v[0:1], v0, off +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_setpc_b64 s[30:31] +bb: + store volatile i64 15, i64 addrspace(5)* %arg, align 1 + %load = load volatile i64, i64 addrspace(5)* %arg, align 1 + ret void +} + +define void @store_load_v3i32_unaligned(<3 x i32> addrspace(5)* nocapture %arg) { +; GFX9-LABEL: store_load_v3i32_unaligned: +; GFX9: ; %bb.0: ; %bb +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_mov_b32 s2, 3 +; GFX9-NEXT: s_mov_b32 s1, 2 +; GFX9-NEXT: s_mov_b32 s0, 1 +; GFX9-NEXT: v_mov_b32_e32 v3, s2 +; GFX9-NEXT: v_mov_b32_e32 v2, s1 +; GFX9-NEXT: v_mov_b32_e32 v1, s0 +; GFX9-NEXT: scratch_store_dwordx3 v0, v[1:3], off +; GFX9-NEXT: scratch_load_dwordx3 v[0:2], v0, off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: store_load_v3i32_unaligned: +; GFX10: ; %bb.0: ; %bb +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_mov_b32 s2, 3 +; GFX10-NEXT: s_mov_b32 s1, 2 +; GFX10-NEXT: s_mov_b32 s0, 1 +; GFX10-NEXT: v_mov_b32_e32 v3, s2 +; GFX10-NEXT: v_mov_b32_e32 v2, s1 +; GFX10-NEXT: v_mov_b32_e32 v1, s0 +; GFX10-NEXT: scratch_store_dwordx3 v0, v[1:3], off +; GFX10-NEXT: scratch_load_dwordx3 v[0:2], v0, off +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_setpc_b64 s[30:31] +bb: + store volatile <3 x i32> , <3 x i32> addrspace(5)* %arg, align 1 + %load = load volatile <3 x i32>, <3 x i32> addrspace(5)* %arg, align 1 + ret void +} + +define void @store_load_v4i32_unaligned(<4 x i32> addrspace(5)* nocapture %arg) { +; GFX9-LABEL: store_load_v4i32_unaligned: +; GFX9: ; %bb.0: ; %bb +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_mov_b32 s3, 4 +; GFX9-NEXT: s_mov_b32 s2, 3 +; GFX9-NEXT: s_mov_b32 s1, 2 +; GFX9-NEXT: s_mov_b32 s0, 1 +; GFX9-NEXT: v_mov_b32_e32 v4, s3 +; GFX9-NEXT: v_mov_b32_e32 v3, s2 +; GFX9-NEXT: v_mov_b32_e32 v2, s1 +; GFX9-NEXT: v_mov_b32_e32 v1, s0 +; GFX9-NEXT: scratch_store_dwordx4 v0, v[1:4], off +; GFX9-NEXT: scratch_load_dwordx4 v[0:3], v0, off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: store_load_v4i32_unaligned: +; GFX10: ; %bb.0: ; %bb +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_mov_b32 s3, 4 +; GFX10-NEXT: s_mov_b32 s2, 3 +; GFX10-NEXT: s_mov_b32 s1, 2 +; GFX10-NEXT: s_mov_b32 s0, 1 +; GFX10-NEXT: v_mov_b32_e32 v4, s3 +; GFX10-NEXT: v_mov_b32_e32 v3, s2 +; GFX10-NEXT: v_mov_b32_e32 v2, s1 +; GFX10-NEXT: v_mov_b32_e32 v1, s0 +; GFX10-NEXT: scratch_store_dwordx4 v0, v[1:4], off +; GFX10-NEXT: scratch_load_dwordx4 v[0:3], v0, off +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_setpc_b64 s[30:31] +bb: + store volatile <4 x i32> , <4 x i32> addrspace(5)* %arg, align 1 + %load = load volatile <4 x i32>, <4 x i32> addrspace(5)* %arg, align 1 + ret void +} + +declare i32 @llvm.amdgcn.workitem.id.x() From e6b3db6309f201075dd97fdfb89297f481bcee6e Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Tue, 15 Dec 2020 15:32:32 -0800 Subject: [PATCH 140/378] scudo: Replace the Cache argument on MapAllocator with a Config argument. NFCI. This will allow the secondary allocator to access the MaySupportMemoryTagging bool. Differential Revision: https://reviews.llvm.org/D93729 --- compiler-rt/lib/scudo/standalone/combined.h | 2 +- compiler-rt/lib/scudo/standalone/secondary.h | 14 ++++++------- .../scudo/standalone/tests/secondary_test.cpp | 20 +++++++++++-------- 3 files changed, 20 insertions(+), 16 deletions(-) diff --git a/compiler-rt/lib/scudo/standalone/combined.h b/compiler-rt/lib/scudo/standalone/combined.h index 7bf108e0b5e0e..fae71ba1b84ff 100644 --- a/compiler-rt/lib/scudo/standalone/combined.h +++ b/compiler-rt/lib/scudo/standalone/combined.h @@ -930,7 +930,7 @@ class Allocator { } private: - using SecondaryT = MapAllocator; + using SecondaryT = MapAllocator; typedef typename PrimaryT::SizeClassMap SizeClassMap; static const uptr MinAlignmentLog = SCUDO_MIN_ALIGNMENT_LOG; diff --git a/compiler-rt/lib/scudo/standalone/secondary.h b/compiler-rt/lib/scudo/standalone/secondary.h index cccbeb239daea..063640106abb3 100644 --- a/compiler-rt/lib/scudo/standalone/secondary.h +++ b/compiler-rt/lib/scudo/standalone/secondary.h @@ -245,7 +245,7 @@ template class MapAllocatorCache { atomic_s32 ReleaseToOsIntervalMs; }; -template class MapAllocator { +template class MapAllocator { public: void initLinkerInitialized(GlobalStats *S, s32 ReleaseToOsInterval = -1) { Cache.initLinkerInitialized(ReleaseToOsInterval); @@ -295,7 +295,7 @@ template class MapAllocator { void releaseToOS() { Cache.releaseToOS(); } private: - CacheT Cache; + typename Config::SecondaryCache Cache; HybridMutex Mutex; DoublyLinkedList InUseBlocks; @@ -318,8 +318,8 @@ template class MapAllocator { // For allocations requested with an alignment greater than or equal to a page, // the committed memory will amount to something close to Size - AlignmentHint // (pending rounding and headers). -template -void *MapAllocator::allocate(uptr Size, uptr AlignmentHint, +template +void *MapAllocator::allocate(uptr Size, uptr AlignmentHint, uptr *BlockEnd, FillContentsMode FillContents) { DCHECK_GE(Size, AlignmentHint); @@ -410,7 +410,7 @@ void *MapAllocator::allocate(uptr Size, uptr AlignmentHint, return reinterpret_cast(Ptr + LargeBlock::getHeaderSize()); } -template void MapAllocator::deallocate(void *Ptr) { +template void MapAllocator::deallocate(void *Ptr) { LargeBlock::Header *H = LargeBlock::getHeader(Ptr); const uptr Block = reinterpret_cast(H); const uptr CommitSize = H->BlockEnd - Block; @@ -430,8 +430,8 @@ template void MapAllocator::deallocate(void *Ptr) { unmap(Addr, Size, UNMAP_ALL, &Data); } -template -void MapAllocator::getStats(ScopedString *Str) const { +template +void MapAllocator::getStats(ScopedString *Str) const { Str->append( "Stats: MapAllocator: allocated %zu times (%zuK), freed %zu times " "(%zuK), remains %zu (%zuK) max %zuM\n", diff --git a/compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp b/compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp index 3c1e77987ec46..846ec8f6d6faa 100644 --- a/compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp @@ -19,7 +19,9 @@ #include #include -template static void testSecondaryBasic(void) { +template static void testSecondaryBasic(void) { + using SecondaryT = scudo::MapAllocator; + scudo::GlobalStats S; S.init(); std::unique_ptr L(new SecondaryT); @@ -55,7 +57,12 @@ template static void testSecondaryBasic(void) { Str.output(); } +struct NoCacheConfig { + typedef scudo::MapAllocatorNoCache SecondaryCache; +}; + struct TestConfig { + typedef scudo::MapAllocatorCache SecondaryCache; static const scudo::u32 SecondaryCacheEntriesArraySize = 128U; static const scudo::u32 SecondaryCacheDefaultMaxEntriesCount = 64U; static const scudo::uptr SecondaryCacheDefaultMaxEntrySize = 1UL << 20; @@ -64,15 +71,12 @@ struct TestConfig { }; TEST(ScudoSecondaryTest, SecondaryBasic) { - testSecondaryBasic>(); - testSecondaryBasic< - scudo::MapAllocator>>(); - testSecondaryBasic< - scudo::MapAllocator>>(); + testSecondaryBasic(); + testSecondaryBasic(); + testSecondaryBasic(); } -using LargeAllocator = - scudo::MapAllocator>; +using LargeAllocator = scudo::MapAllocator; // This exercises a variety of combinations of size and alignment for the // MapAllocator. The size computation done here mimic the ones done by the From faac1c02c802048efa17f8f6cda8f39b5584f0c6 Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Tue, 22 Dec 2020 11:48:53 -0800 Subject: [PATCH 141/378] scudo: Move the management of the UseMemoryTagging bit out of the Primary. NFCI. The primary and secondary allocators will need to share this bit, so move the management of the bit to the combined allocator and make useMemoryTagging() a free function. Differential Revision: https://reviews.llvm.org/D93730 --- compiler-rt/lib/scudo/standalone/combined.h | 30 ++++++++++--------- compiler-rt/lib/scudo/standalone/options.h | 6 ++++ compiler-rt/lib/scudo/standalone/primary32.h | 4 --- compiler-rt/lib/scudo/standalone/primary64.h | 12 +------- .../scudo/standalone/tests/combined_test.cpp | 8 ++--- 5 files changed, 27 insertions(+), 33 deletions(-) diff --git a/compiler-rt/lib/scudo/standalone/combined.h b/compiler-rt/lib/scudo/standalone/combined.h index fae71ba1b84ff..911c49dcc3fd7 100644 --- a/compiler-rt/lib/scudo/standalone/combined.h +++ b/compiler-rt/lib/scudo/standalone/combined.h @@ -100,7 +100,7 @@ class Allocator { // Reset tag to 0 as this chunk may have been previously used for a tagged // user allocation. - if (UNLIKELY(Allocator.useMemoryTagging())) + if (UNLIKELY(useMemoryTagging(Allocator.Primary.Options.load()))) storeTags(reinterpret_cast(Ptr), reinterpret_cast(Ptr) + sizeof(QuarantineBatch)); @@ -161,6 +161,9 @@ class Allocator { Primary.Options.set(OptionBit::DeallocTypeMismatch); if (getFlags()->delete_size_mismatch) Primary.Options.set(OptionBit::DeleteSizeMismatch); + if (allocatorSupportsMemoryTagging() && + systemSupportsMemoryTagging()) + Primary.Options.set(OptionBit::UseMemoryTagging); Primary.Options.set(OptionBit::UseOddEvenTags); QuarantineMaxChunkSize = @@ -240,7 +243,7 @@ class Allocator { } ALWAYS_INLINE void *untagPointerMaybe(void *Ptr) { - if (Primary.SupportsMemoryTagging) + if (allocatorSupportsMemoryTagging()) return reinterpret_cast( untagPointer(reinterpret_cast(Ptr))); return Ptr; @@ -367,7 +370,7 @@ class Allocator { // // When memory tagging is enabled, zeroing the contents is done as part of // setting the tag. - if (UNLIKELY(useMemoryTagging(Options))) { + if (UNLIKELY(useMemoryTagging(Options))) { uptr PrevUserPtr; Chunk::UnpackedHeader Header; const uptr BlockSize = PrimaryT::getSizeByClassId(ClassId); @@ -594,7 +597,7 @@ class Allocator { : BlockEnd - (reinterpret_cast(OldPtr) + NewSize)) & Chunk::SizeOrUnusedBytesMask; Chunk::compareExchangeHeader(Cookie, OldPtr, &NewHeader, &OldHeader); - if (UNLIKELY(ClassId && useMemoryTagging(Options))) { + if (UNLIKELY(ClassId && useMemoryTagging(Options))) { resizeTaggedChunk(reinterpret_cast(OldTaggedPtr) + OldSize, reinterpret_cast(OldTaggedPtr) + NewSize, BlockEnd); @@ -692,7 +695,7 @@ class Allocator { if (getChunkFromBlock(Block, &Chunk, &Header) && Header.State == Chunk::State::Allocated) { uptr TaggedChunk = Chunk; - if (useMemoryTagging(Primary.Options.load())) + if (useMemoryTagging(Primary.Options.load())) TaggedChunk = loadTag(Chunk); Callback(TaggedChunk, getSize(reinterpret_cast(Chunk), &Header), Arg); @@ -783,15 +786,14 @@ class Allocator { Header.State == Chunk::State::Allocated; } - bool useMemoryTagging() const { - return useMemoryTagging(Primary.Options.load()); + bool useMemoryTaggingTestOnly() const { + return useMemoryTagging(Primary.Options.load()); } - static bool useMemoryTagging(Options Options) { - return PrimaryT::useMemoryTagging(Options); + void disableMemoryTagging() { + if (allocatorSupportsMemoryTagging()) + Primary.Options.clear(OptionBit::UseMemoryTagging); } - void disableMemoryTagging() { Primary.disableMemoryTagging(); } - void setTrackAllocationStacks(bool Track) { initThreadMaybe(); if (Track) @@ -823,7 +825,7 @@ class Allocator { const char *MemoryTags, uintptr_t MemoryAddr, size_t MemorySize) { *ErrorInfo = {}; - if (!PrimaryT::SupportsMemoryTagging || + if (!allocatorSupportsMemoryTagging() || MemoryAddr + MemorySize < MemoryAddr) return; @@ -942,7 +944,7 @@ class Allocator { static_assert(MinAlignment >= sizeof(Chunk::PackedHeader), "Minimal alignment must at least cover a chunk header."); - static_assert(!PrimaryT::SupportsMemoryTagging || + static_assert(!allocatorSupportsMemoryTagging() || MinAlignment >= archMemoryTagGranuleSize(), ""); @@ -1037,7 +1039,7 @@ class Allocator { void quarantineOrDeallocateChunk(Options Options, void *Ptr, Chunk::UnpackedHeader *Header, uptr Size) { Chunk::UnpackedHeader NewHeader = *Header; - if (UNLIKELY(NewHeader.ClassId && useMemoryTagging(Options))) { + if (UNLIKELY(NewHeader.ClassId && useMemoryTagging(Options))) { u8 PrevTag = extractTag(loadTag(reinterpret_cast(Ptr))); if (!TSDRegistry.getDisableMemInit()) { uptr TaggedBegin, TaggedEnd; diff --git a/compiler-rt/lib/scudo/standalone/options.h b/compiler-rt/lib/scudo/standalone/options.h index 2cffc4d75c38c..91301bf5ec9c2 100644 --- a/compiler-rt/lib/scudo/standalone/options.h +++ b/compiler-rt/lib/scudo/standalone/options.h @@ -11,6 +11,7 @@ #include "atomic_helpers.h" #include "common.h" +#include "memtag.h" namespace scudo { @@ -36,6 +37,11 @@ struct Options { } }; +template bool useMemoryTagging(Options Options) { + return allocatorSupportsMemoryTagging() && + Options.get(OptionBit::UseMemoryTagging); +} + struct AtomicOptions { atomic_u32 Val; diff --git a/compiler-rt/lib/scudo/standalone/primary32.h b/compiler-rt/lib/scudo/standalone/primary32.h index c744670b43926..a88a2a67e9515 100644 --- a/compiler-rt/lib/scudo/standalone/primary32.h +++ b/compiler-rt/lib/scudo/standalone/primary32.h @@ -50,7 +50,6 @@ template class SizeClassAllocator32 { typedef SizeClassAllocator32 ThisT; typedef SizeClassAllocatorLocalCache CacheT; typedef typename CacheT::TransferBatch TransferBatch; - static const bool SupportsMemoryTagging = false; static uptr getSizeByClassId(uptr ClassId) { return (ClassId == SizeClassMap::BatchClassId) @@ -216,9 +215,6 @@ template class SizeClassAllocator32 { return TotalReleasedBytes; } - static bool useMemoryTagging(UNUSED Options Options) { return false; } - void disableMemoryTagging() {} - const char *getRegionInfoArrayAddress() const { return nullptr; } static uptr getRegionInfoArraySize() { return 0; } diff --git a/compiler-rt/lib/scudo/standalone/primary64.h b/compiler-rt/lib/scudo/standalone/primary64.h index df1310aa8e959..2724a2529f759 100644 --- a/compiler-rt/lib/scudo/standalone/primary64.h +++ b/compiler-rt/lib/scudo/standalone/primary64.h @@ -46,8 +46,6 @@ template class SizeClassAllocator64 { typedef SizeClassAllocator64 ThisT; typedef SizeClassAllocatorLocalCache CacheT; typedef typename CacheT::TransferBatch TransferBatch; - static const bool SupportsMemoryTagging = - allocatorSupportsMemoryTagging(); static uptr getSizeByClassId(uptr ClassId) { return (ClassId == SizeClassMap::BatchClassId) @@ -76,9 +74,6 @@ template class SizeClassAllocator64 { Region->ReleaseInfo.LastReleaseAtNs = Time; } setOption(Option::ReleaseInterval, static_cast(ReleaseToOsInterval)); - - if (SupportsMemoryTagging && systemSupportsMemoryTagging()) - Options.set(OptionBit::UseMemoryTagging); } void init(s32 ReleaseToOsInterval) { memset(this, 0, sizeof(*this)); @@ -193,11 +188,6 @@ template class SizeClassAllocator64 { return TotalReleasedBytes; } - static bool useMemoryTagging(Options Options) { - return SupportsMemoryTagging && Options.get(OptionBit::UseMemoryTagging); - } - void disableMemoryTagging() { Options.clear(OptionBit::UseMemoryTagging); } - const char *getRegionInfoArrayAddress() const { return reinterpret_cast(RegionInfoArray); } @@ -335,7 +325,7 @@ template class SizeClassAllocator64 { if (!map(reinterpret_cast(RegionBeg + MappedUser), UserMapSize, "scudo:primary", MAP_ALLOWNOMEM | MAP_RESIZABLE | - (useMemoryTagging(Options.load()) ? MAP_MEMTAG : 0), + (useMemoryTagging(Options.load()) ? MAP_MEMTAG : 0), &Region->Data)) return nullptr; Region->MappedUser += UserMapSize; diff --git a/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp b/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp index 53874933e7ac3..7bb6725d3a520 100644 --- a/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp @@ -47,7 +47,7 @@ bool isPrimaryAllocation(scudo::uptr Size, scudo::uptr Alignment) { template bool isTaggedAllocation(AllocatorT *Allocator, scudo::uptr Size, scudo::uptr Alignment) { - return Allocator->useMemoryTagging() && + return Allocator->useMemoryTaggingTestOnly() && scudo::systemDetectsMemoryTagFaultsTestOnly() && isPrimaryAllocation(Size, Alignment); } @@ -162,7 +162,7 @@ template static void testAllocator() { for (scudo::uptr I = 0; I < Size; I++) { unsigned char V = (reinterpret_cast(P))[I]; if (isPrimaryAllocation(Size, 1U << MinAlignLog) && - !Allocator->useMemoryTagging()) + !Allocator->useMemoryTaggingTestOnly()) ASSERT_EQ(V, scudo::PatternFillByte); else ASSERT_TRUE(V == scudo::PatternFillByte || V == 0); @@ -248,7 +248,7 @@ template static void testAllocator() { Allocator->releaseToOS(); - if (Allocator->useMemoryTagging() && + if (Allocator->useMemoryTaggingTestOnly() && scudo::systemDetectsMemoryTagFaultsTestOnly()) { // Check that use-after-free is detected. for (scudo::uptr SizeLog = 0U; SizeLog <= 20U; SizeLog++) { @@ -493,7 +493,7 @@ TEST(ScudoCombinedTest, OddEven) { using SizeClassMap = AllocatorT::PrimaryT::SizeClassMap; auto Allocator = std::unique_ptr(new AllocatorT()); - if (!Allocator->useMemoryTagging()) + if (!Allocator->useMemoryTaggingTestOnly()) return; auto CheckOddEven = [](scudo::uptr P1, scudo::uptr P2) { From 22cf54a7fba670642c121684ac3c7ff7e35dfa5c Mon Sep 17 00:00:00 2001 From: Arthur O'Dwyer Date: Sun, 22 Mar 2020 22:20:04 -0400 Subject: [PATCH 142/378] Replace `T(x)` with `reinterpret_cast(x)` everywhere it means reinterpret_cast. NFC. Differential Revision: https://reviews.llvm.org/D76572 --- clang/lib/CodeGen/CGCall.h | 6 +++-- llvm/include/llvm/IR/SymbolTableListTraits.h | 8 ++++--- llvm/include/llvm/Object/Binary.h | 4 ++-- .../lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp | 7 +++--- llvm/lib/Object/COFFObjectFile.cpp | 24 +++++++++++-------- llvm/lib/Object/ELFObjectFile.cpp | 3 ++- llvm/lib/Object/XCOFFObjectFile.cpp | 4 ++-- llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp | 9 +++---- llvm/tools/llvm-readobj/ELFDumper.cpp | 12 ++++++---- 9 files changed, 45 insertions(+), 32 deletions(-) diff --git a/clang/lib/CodeGen/CGCall.h b/clang/lib/CodeGen/CGCall.h index 509ca43a97848..e3d9fec6d3634 100644 --- a/clang/lib/CodeGen/CGCall.h +++ b/clang/lib/CodeGen/CGCall.h @@ -110,7 +110,8 @@ class CGCallee { /// Construct a callee. Call this constructor directly when this /// isn't a direct call. CGCallee(const CGCalleeInfo &abstractInfo, llvm::Value *functionPtr) - : KindOrFunctionPointer(SpecialKind(uintptr_t(functionPtr))) { + : KindOrFunctionPointer( + SpecialKind(reinterpret_cast(functionPtr))) { AbstractInfo = abstractInfo; assert(functionPtr && "configuring callee without function pointer"); assert(functionPtr->getType()->isPointerTy()); @@ -186,7 +187,8 @@ class CGCallee { } void setFunctionPointer(llvm::Value *functionPtr) { assert(isOrdinary()); - KindOrFunctionPointer = SpecialKind(uintptr_t(functionPtr)); + KindOrFunctionPointer = + SpecialKind(reinterpret_cast(functionPtr)); } bool isVirtual() const { diff --git a/llvm/include/llvm/IR/SymbolTableListTraits.h b/llvm/include/llvm/IR/SymbolTableListTraits.h index 5b793e5dbf283..8af712374bfaf 100644 --- a/llvm/include/llvm/IR/SymbolTableListTraits.h +++ b/llvm/include/llvm/IR/SymbolTableListTraits.h @@ -76,9 +76,11 @@ class SymbolTableListTraits : public ilist_alloc_traits { /// getListOwner - Return the object that owns this list. If this is a list /// of instructions, it returns the BasicBlock that owns them. ItemParentClass *getListOwner() { - size_t Offset(size_t(&((ItemParentClass*)nullptr->*ItemParentClass:: - getSublistAccess(static_cast(nullptr))))); - ListTy *Anchor(static_cast(this)); + size_t Offset = reinterpret_cast( + &((ItemParentClass *)nullptr->*ItemParentClass::getSublistAccess( + static_cast( + nullptr)))); + ListTy *Anchor = static_cast(this); return reinterpret_cast(reinterpret_cast(Anchor)- Offset); } diff --git a/llvm/include/llvm/Object/Binary.h b/llvm/include/llvm/Object/Binary.h index e12e512d68b8f..dd98e1143e259 100644 --- a/llvm/include/llvm/Object/Binary.h +++ b/llvm/include/llvm/Object/Binary.h @@ -165,8 +165,8 @@ class Binary { static Error checkOffset(MemoryBufferRef M, uintptr_t Addr, const uint64_t Size) { if (Addr + Size < Addr || Addr + Size < Size || - Addr + Size > uintptr_t(M.getBufferEnd()) || - Addr < uintptr_t(M.getBufferStart())) { + Addr + Size > reinterpret_cast(M.getBufferEnd()) || + Addr < reinterpret_cast(M.getBufferStart())) { return errorCodeToError(object_error::unexpected_eof); } return Error::success(); diff --git a/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp index 8fa83f5159109..354b638b47a2c 100644 --- a/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp @@ -145,9 +145,10 @@ void OcamlGCMetadataPrinter::finishAssembly(Module &M, GCModuleInfo &Info, report_fatal_error("Function '" + FI.getFunction().getName() + "' is too large for the ocaml GC! " "Frame size " + - Twine(FrameSize) + ">= 65536.\n" - "(" + - Twine(uintptr_t(&FI)) + ")"); + Twine(FrameSize) + + ">= 65536.\n" + "(" + + Twine(reinterpret_cast(&FI)) + ")"); } AP.OutStreamer->AddComment("live roots for " + diff --git a/llvm/lib/Object/COFFObjectFile.cpp b/llvm/lib/Object/COFFObjectFile.cpp index c0902597fadf3..2e44a38ccdaaa 100644 --- a/llvm/lib/Object/COFFObjectFile.cpp +++ b/llvm/lib/Object/COFFObjectFile.cpp @@ -57,7 +57,7 @@ static bool checkSize(MemoryBufferRef M, std::error_code &EC, uint64_t Size) { template static Error getObject(const T *&Obj, MemoryBufferRef M, const void *Ptr, const uint64_t Size = sizeof(T)) { - uintptr_t Addr = uintptr_t(Ptr); + uintptr_t Addr = reinterpret_cast(Ptr); if (Error E = Binary::checkOffset(M, Addr, Size)) return E; Obj = reinterpret_cast(Addr); @@ -103,10 +103,11 @@ const coff_symbol_type *COFFObjectFile::toSymb(DataRefImpl Ref) const { const coff_symbol_type *Addr = reinterpret_cast(Ref.p); - assert(!checkOffset(Data, uintptr_t(Addr), sizeof(*Addr))); + assert(!checkOffset(Data, reinterpret_cast(Addr), sizeof(*Addr))); #ifndef NDEBUG // Verify that the symbol points to a valid entry in the symbol table. - uintptr_t Offset = uintptr_t(Addr) - uintptr_t(base()); + uintptr_t Offset = + reinterpret_cast(Addr) - reinterpret_cast(base()); assert((Offset - getPointerToSymbolTable()) % sizeof(coff_symbol_type) == 0 && "Symbol did not point to the beginning of a symbol"); @@ -123,7 +124,8 @@ const coff_section *COFFObjectFile::toSec(DataRefImpl Ref) const { if (Addr < SectionTable || Addr >= (SectionTable + getNumberOfSections())) report_fatal_error("Section was outside of section table."); - uintptr_t Offset = uintptr_t(Addr) - uintptr_t(SectionTable); + uintptr_t Offset = reinterpret_cast(Addr) - + reinterpret_cast(SectionTable); assert(Offset % sizeof(coff_section) == 0 && "Section did not point to the beginning of a section"); #endif @@ -332,7 +334,7 @@ bool COFFObjectFile::isDebugSection(StringRef SectionName) const { unsigned COFFObjectFile::getSectionID(SectionRef Sec) const { uintptr_t Offset = - uintptr_t(Sec.getRawDataRefImpl().p) - uintptr_t(SectionTable); + Sec.getRawDataRefImpl().p - reinterpret_cast(SectionTable); assert((Offset % sizeof(coff_section)) == 0); return (Offset / sizeof(coff_section)) + 1; } @@ -376,7 +378,7 @@ getFirstReloc(const coff_section *Sec, MemoryBufferRef M, const uint8_t *Base) { // relocations. begin++; } - if (auto E = Binary::checkOffset(M, uintptr_t(begin), + if (auto E = Binary::checkOffset(M, reinterpret_cast(begin), sizeof(coff_relocation) * NumRelocs)) { consumeError(std::move(E)); return nullptr; @@ -467,7 +469,8 @@ Error COFFObjectFile::getRvaPtr(uint32_t Addr, uintptr_t &Res) const { uint32_t SectionEnd = Section->VirtualAddress + Section->VirtualSize; if (SectionStart <= Addr && Addr < SectionEnd) { uint32_t Offset = Addr - SectionStart; - Res = uintptr_t(base()) + Section->PointerToRawData + Offset; + Res = reinterpret_cast(base()) + Section->PointerToRawData + + Offset; return Error::success(); } } @@ -484,8 +487,8 @@ Error COFFObjectFile::getRvaAndSizeAsBytes(uint32_t RVA, uint32_t Size, uint32_t OffsetIntoSection = RVA - SectionStart; if (SectionStart <= RVA && OffsetIntoSection < Section->VirtualSize && Size <= Section->VirtualSize - OffsetIntoSection) { - uintptr_t Begin = - uintptr_t(base()) + Section->PointerToRawData + OffsetIntoSection; + uintptr_t Begin = reinterpret_cast(base()) + + Section->PointerToRawData + OffsetIntoSection; Contents = ArrayRef(reinterpret_cast(Begin), Size); return Error::success(); @@ -1127,7 +1130,8 @@ Error COFFObjectFile::getSectionContents(const coff_section *Sec, // The only thing that we need to verify is that the contents is contained // within the file bounds. We don't need to make sure it doesn't cover other // data, as there's nothing that says that is not allowed. - uintptr_t ConStart = uintptr_t(base()) + Sec->PointerToRawData; + uintptr_t ConStart = + reinterpret_cast(base()) + Sec->PointerToRawData; uint32_t SectionSize = getSectionSize(Sec); if (Error E = checkOffset(Data, ConStart, SectionSize)) return E; diff --git a/llvm/lib/Object/ELFObjectFile.cpp b/llvm/lib/Object/ELFObjectFile.cpp index c654c3fd3d6c1..91871a6255dc8 100644 --- a/llvm/lib/Object/ELFObjectFile.cpp +++ b/llvm/lib/Object/ELFObjectFile.cpp @@ -73,7 +73,8 @@ ObjectFile::createELFObjectFile(MemoryBufferRef Obj, bool InitContent) { std::pair Ident = getElfArchType(Obj.getBuffer()); std::size_t MaxAlignment = - 1ULL << countTrailingZeros(uintptr_t(Obj.getBufferStart())); + 1ULL << countTrailingZeros( + reinterpret_cast(Obj.getBufferStart())); if (MaxAlignment < 2) return createError("Insufficient alignment"); diff --git a/llvm/lib/Object/XCOFFObjectFile.cpp b/llvm/lib/Object/XCOFFObjectFile.cpp index fee468584f36a..b0d772b6ff212 100644 --- a/llvm/lib/Object/XCOFFObjectFile.cpp +++ b/llvm/lib/Object/XCOFFObjectFile.cpp @@ -31,7 +31,7 @@ static const uint16_t NoRelMask = 0x0001; template static Expected getObject(MemoryBufferRef M, const void *Ptr, const uint64_t Size = sizeof(T)) { - uintptr_t Addr = uintptr_t(Ptr); + uintptr_t Addr = reinterpret_cast(Ptr); if (Error E = Binary::checkOffset(M, Addr, Size)) return std::move(E); return reinterpret_cast(Addr); @@ -283,7 +283,7 @@ XCOFFObjectFile::getSectionContents(DataRefImpl Sec) const { const uint8_t * ContentStart = base() + OffsetToRaw; uint64_t SectionSize = getSectionSize(Sec); - if (checkOffset(Data, uintptr_t(ContentStart), SectionSize)) + if (checkOffset(Data, reinterpret_cast(ContentStart), SectionSize)) return make_error(); return makeArrayRef(ContentStart,SectionSize); diff --git a/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp b/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp index a8e03a653e86a..04cd432f56c5c 100644 --- a/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp +++ b/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp @@ -472,10 +472,11 @@ static const NodeSet *node_class(GepNode *N, NodeSymRel &Rel) { // determining equality. The only purpose of the ordering is to eliminate // duplication due to the commutativity of equality/non-equality. static NodePair node_pair(GepNode *N1, GepNode *N2) { - uintptr_t P1 = uintptr_t(N1), P2 = uintptr_t(N2); - if (P1 <= P2) - return std::make_pair(N1, N2); - return std::make_pair(N2, N1); + uintptr_t P1 = reinterpret_cast(N1); + uintptr_t P2 = reinterpret_cast(N2); + if (P1 <= P2) + return std::make_pair(N1, N2); + return std::make_pair(N2, N1); } static unsigned node_hash(GepNode *N) { diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp index 178cca87f2d4a..a82494ad1b4d8 100644 --- a/llvm/tools/llvm-readobj/ELFDumper.cpp +++ b/llvm/tools/llvm-readobj/ELFDumper.cpp @@ -505,7 +505,9 @@ Expected> ELFDumper::getVersionTable(const Elf_Shdr &Sec, ArrayRef *SymTab, StringRef *StrTab) const { assert((!SymTab && !StrTab) || (SymTab && StrTab)); - if (uintptr_t(Obj.base() + Sec.sh_offset) % sizeof(uint16_t) != 0) + if (reinterpret_cast(Obj.base() + Sec.sh_offset) % + sizeof(uint16_t) != + 0) return createError("the " + describe(Sec) + " is misaligned"); Expected> VersionsOrErr = @@ -576,7 +578,7 @@ ELFDumper::getVersionDefinitions(const Elf_Shdr &Sec) const { return createError("invalid " + describe(Sec) + ": version definition " + Twine(I) + " goes past the end of the section"); - if (uintptr_t(VerdefBuf) % sizeof(uint32_t) != 0) + if (reinterpret_cast(VerdefBuf) % sizeof(uint32_t) != 0) return createError( "invalid " + describe(Sec) + ": found a misaligned version definition entry at offset 0x" + @@ -598,7 +600,7 @@ ELFDumper::getVersionDefinitions(const Elf_Shdr &Sec) const { const uint8_t *VerdauxBuf = VerdefBuf + D->vd_aux; for (unsigned J = 0; J < D->vd_cnt; ++J) { - if (uintptr_t(VerdauxBuf) % sizeof(uint32_t) != 0) + if (reinterpret_cast(VerdauxBuf) % sizeof(uint32_t) != 0) return createError("invalid " + describe(Sec) + ": found a misaligned auxiliary entry at offset 0x" + Twine::utohexstr(VerdauxBuf - Start)); @@ -644,7 +646,7 @@ ELFDumper::getVersionDependencies(const Elf_Shdr &Sec) const { return createError("invalid " + describe(Sec) + ": version dependency " + Twine(I) + " goes past the end of the section"); - if (uintptr_t(VerneedBuf) % sizeof(uint32_t) != 0) + if (reinterpret_cast(VerneedBuf) % sizeof(uint32_t) != 0) return createError( "invalid " + describe(Sec) + ": found a misaligned version dependency entry at offset 0x" + @@ -670,7 +672,7 @@ ELFDumper::getVersionDependencies(const Elf_Shdr &Sec) const { const uint8_t *VernauxBuf = VerneedBuf + Verneed->vn_aux; for (unsigned J = 0; J < Verneed->vn_cnt; ++J) { - if (uintptr_t(VernauxBuf) % sizeof(uint32_t) != 0) + if (reinterpret_cast(VernauxBuf) % sizeof(uint32_t) != 0) return createError("invalid " + describe(Sec) + ": found a misaligned auxiliary entry at offset 0x" + Twine::utohexstr(VernauxBuf - Start)); From 5bec0828347893544ab863ddf4caa2f0b5ef79dd Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 17 Dec 2020 18:47:13 -0500 Subject: [PATCH 143/378] VirtRegMap: Use Register --- llvm/include/llvm/CodeGen/VirtRegMap.h | 8 ++++---- llvm/lib/CodeGen/LiveRangeEdit.cpp | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/llvm/include/llvm/CodeGen/VirtRegMap.h b/llvm/include/llvm/CodeGen/VirtRegMap.h index 1775afb51bdf2..deef4b90279a0 100644 --- a/llvm/include/llvm/CodeGen/VirtRegMap.h +++ b/llvm/include/llvm/CodeGen/VirtRegMap.h @@ -151,7 +151,7 @@ class TargetInstrInfo; bool hasKnownPreference(Register VirtReg); /// records virtReg is a split live interval from SReg. - void setIsSplitFromReg(Register virtReg, unsigned SReg) { + void setIsSplitFromReg(Register virtReg, Register SReg) { Virt2SplitMap[virtReg.id()] = SReg; if (hasShape(SReg)) { Virt2ShapeMap[virtReg.id()] = getShape(SReg); @@ -159,7 +159,7 @@ class TargetInstrInfo; } /// returns the live interval virtReg is split from. - unsigned getPreSplitReg(Register virtReg) const { + Register getPreSplitReg(Register virtReg) const { return Virt2SplitMap[virtReg.id()]; } @@ -167,8 +167,8 @@ class TargetInstrInfo; /// from through splitting. /// A register that was not created by splitting is its own original. /// This operation is idempotent. - unsigned getOriginal(unsigned VirtReg) const { - unsigned Orig = getPreSplitReg(VirtReg); + Register getOriginal(Register VirtReg) const { + Register Orig = getPreSplitReg(VirtReg); return Orig ? Orig : VirtReg; } diff --git a/llvm/lib/CodeGen/LiveRangeEdit.cpp b/llvm/lib/CodeGen/LiveRangeEdit.cpp index dbf02802d4735..037cb54262356 100644 --- a/llvm/lib/CodeGen/LiveRangeEdit.cpp +++ b/llvm/lib/CodeGen/LiveRangeEdit.cpp @@ -435,7 +435,7 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl &Dead, if (!SplitLIs.empty()) ++NumFracRanges; - unsigned Original = VRM ? VRM->getOriginal(VReg) : 0; + Register Original = VRM ? VRM->getOriginal(VReg) : Register(); for (const LiveInterval *SplitLI : SplitLIs) { // If LI is an original interval that hasn't been split yet, make the new // intervals their own originals instead of referring to LI. The original From 29ed846d671117b9a635767dac43cb19fb5ce11f Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 21 Dec 2020 13:27:32 -0500 Subject: [PATCH 144/378] AMDGPU: Fix assert when checking for implicit operand legality --- llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp | 8 ++++++-- llvm/test/CodeGen/AMDGPU/si-fix-sgpr-copies.mir | 16 ++++++++++++++++ 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp index 75a54c0a412e9..d6c151d3d2cc2 100644 --- a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp +++ b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp @@ -213,8 +213,12 @@ static bool tryChangeVGPRtoSGPRinCopy(MachineInstr &MI, if (UseMI == &MI) continue; if (MO.isDef() || UseMI->getParent() != MI.getParent() || - UseMI->getOpcode() <= TargetOpcode::GENERIC_OP_END || - !TII->isOperandLegal(*UseMI, UseMI->getOperandNo(&MO), &Src)) + UseMI->getOpcode() <= TargetOpcode::GENERIC_OP_END) + return false; + + unsigned OpIdx = UseMI->getOperandNo(&MO); + if (OpIdx >= UseMI->getDesc().getNumOperands() || + !TII->isOperandLegal(*UseMI, OpIdx, &Src)) return false; } // Change VGPR to SGPR destination. diff --git a/llvm/test/CodeGen/AMDGPU/si-fix-sgpr-copies.mir b/llvm/test/CodeGen/AMDGPU/si-fix-sgpr-copies.mir index e96f2839ab029..6c438d80e5327 100644 --- a/llvm/test/CodeGen/AMDGPU/si-fix-sgpr-copies.mir +++ b/llvm/test/CodeGen/AMDGPU/si-fix-sgpr-copies.mir @@ -72,3 +72,19 @@ body: | %1:sreg_32_xm0 = COPY %0 S_ENDPGM 0, implicit %1 ... + +# Make sure there's no assert when looking at the implicit use on S_ENDPGM +# GCN-LABEL: name: s_to_v_copy_implicit_use +# GCN: %0:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef %1:sreg_64, 0, 0, 0 :: (load 4, addrspace 4) +# GCN-NEXT: %2:vgpr_32 = COPY %0 +# GCN-NEXT: S_ENDPGM 0, implicit %2 +--- +name: s_to_v_copy_implicit_use +tracksRegLiveness: true +body: | + bb.0: + %0:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef %2:sreg_64, 0, 0, 0 :: (load 4, addrspace 4) + %1:vgpr_32 = COPY %0 + S_ENDPGM 0, implicit %1 + +... From c8874464b5f63e300bb7507340c04801e033c02e Mon Sep 17 00:00:00 2001 From: ShihPo Hung Date: Sun, 20 Dec 2020 18:07:42 -0800 Subject: [PATCH 145/378] [RISCV] Add intrinsics for vslide1up/down, vfslide1up/down instruction This patch adds intrinsics for vslide1up, vslide1down, vfslide1up, vfslide1down. Authored-by: Roger Ferrer Ibanez Co-Authored-by: ShihPo Hung Differential Revision: https://reviews.llvm.org/D93608 --- llvm/include/llvm/IR/IntrinsicsRISCV.td | 6 +- .../Target/RISCV/RISCVInstrInfoVPseudos.td | 21 +- .../CodeGen/RISCV/rvv/vfslide1down-rv32.ll | 512 +++++++++ .../CodeGen/RISCV/rvv/vfslide1down-rv64.ll | 698 ++++++++++++ .../test/CodeGen/RISCV/rvv/vfslide1up-rv32.ll | 523 +++++++++ .../test/CodeGen/RISCV/rvv/vfslide1up-rv64.ll | 713 ++++++++++++ .../CodeGen/RISCV/rvv/vslide1down-rv32.ll | 800 +++++++++++++ .../CodeGen/RISCV/rvv/vslide1down-rv64.ll | 978 ++++++++++++++++ llvm/test/CodeGen/RISCV/rvv/vslide1up-rv32.ll | 24 + llvm/test/CodeGen/RISCV/rvv/vslide1up-rv64.ll | 1000 +++++++++++++++++ 10 files changed, 5270 insertions(+), 5 deletions(-) create mode 100644 llvm/test/CodeGen/RISCV/rvv/vfslide1down-rv32.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vfslide1down-rv64.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vfslide1up-rv32.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vfslide1up-rv64.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vslide1down-rv32.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vslide1down-rv64.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vslide1up-rv32.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vslide1up-rv64.ll diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td index efb91c0ab0596..f2aed28440ccb 100644 --- a/llvm/include/llvm/IR/IntrinsicsRISCV.td +++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td @@ -535,6 +535,11 @@ let TargetPrefix = "riscv" in { defm vslideup : RISCVTernaryAAAX; defm vslidedown : RISCVTernaryAAAX; + defm vslide1up : RISCVBinaryAAX; + defm vslide1down : RISCVBinaryAAX; + defm vfslide1up : RISCVBinaryAAX; + defm vfslide1down : RISCVBinaryAAX; + defm vaaddu : RISCVSaturatingBinaryAAX; defm vaadd : RISCVSaturatingBinaryAAX; defm vasubu : RISCVSaturatingBinaryAAX; @@ -554,5 +559,4 @@ let TargetPrefix = "riscv" in { defm vmfle : RISCVCompare; defm vmfgt : RISCVCompare; defm vmfge : RISCVCompare; - } // TargetPrefix = "riscv" diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td index c28bd30936438..7a3e33d6d7db6 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -673,10 +673,10 @@ multiclass VPseudoBinaryV_VV { defm _VV : VPseudoBinary; } -multiclass VPseudoBinaryV_VX { +multiclass VPseudoBinaryV_VX { foreach m = MxList.m in defm !if(IsFloat, "_VF", "_VX") : VPseudoBinary; + !if(IsFloat, FPR32, GPR), m, Constraint>; } multiclass VPseudoBinaryV_VI { @@ -1954,8 +1954,17 @@ let mayLoad = 0, mayStore = 0, hasSideEffects = 0, usesCustomInserter = 1, //===----------------------------------------------------------------------===// // 17.3. Vector Slide Instructions //===----------------------------------------------------------------------===// -defm PseudoVSLIDEUP : VPseudoTernaryV_VX_VI; -defm PseudoVSLIDEDOWN : VPseudoTernaryV_VX_VI; +let Predicates = [HasStdExtV] in { + defm PseudoVSLIDEUP : VPseudoTernaryV_VX_VI; + defm PseudoVSLIDEDOWN : VPseudoTernaryV_VX_VI; + defm PseudoVSLIDE1UP : VPseudoBinaryV_VX; + defm PseudoVSLIDE1DOWN : VPseudoBinaryV_VX; +} // Predicates = [HasStdExtV] + +let Predicates = [HasStdExtV, HasStdExtF] in { + defm PseudoVFSLIDE1UP : VPseudoBinaryV_VX; + defm PseudoVFSLIDE1DOWN : VPseudoBinaryV_VX; +} // Predicates = [HasStdExtV, HasStdExtF] //===----------------------------------------------------------------------===// // Patterns. @@ -2323,9 +2332,13 @@ foreach fvti = AllFloatVectors in { let Predicates = [HasStdExtV] in { defm "" : VPatTernaryV_VX_VI<"int_riscv_vslideup", "PseudoVSLIDEUP", AllIntegerVectors, uimm5>; defm "" : VPatTernaryV_VX_VI<"int_riscv_vslidedown", "PseudoVSLIDEDOWN", AllIntegerVectors, uimm5>; + defm "" : VPatBinaryV_VX<"int_riscv_vslide1up", "PseudoVSLIDE1UP", AllIntegerVectors>; + defm "" : VPatBinaryV_VX<"int_riscv_vslide1down", "PseudoVSLIDE1DOWN", AllIntegerVectors>; } // Predicates = [HasStdExtV] let Predicates = [HasStdExtV, HasStdExtF] in { defm "" : VPatTernaryV_VX_VI<"int_riscv_vslideup", "PseudoVSLIDEUP", AllFloatVectors, uimm5>; defm "" : VPatTernaryV_VX_VI<"int_riscv_vslidedown", "PseudoVSLIDEDOWN", AllFloatVectors, uimm5>; + defm "" : VPatBinaryV_VX<"int_riscv_vfslide1up", "PseudoVFSLIDE1UP", AllFloatVectors>; + defm "" : VPatBinaryV_VX<"int_riscv_vfslide1down", "PseudoVFSLIDE1DOWN", AllFloatVectors>; } // Predicates = [HasStdExtV, HasStdExtF] diff --git a/llvm/test/CodeGen/RISCV/rvv/vfslide1down-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfslide1down-rv32.ll new file mode 100644 index 0000000000000..c1e49e56602d1 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfslide1down-rv32.ll @@ -0,0 +1,512 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f,+experimental-zfh -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare @llvm.riscv.vfslide1down.nxv1f16.f16( + , + half, + i32); + +define @intrinsic_vfslide1down_vf_nxv1f16_nxv1f16_f16( %0, half %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv1f16_nxv1f16_f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf4,ta,mu +; CHECK-NEXT: vfslide1down.vf v16, v16, ft0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1down.nxv1f16.f16( + %0, + half %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vfslide1down.mask.nxv1f16.f16( + , + , + half, + , + i32); + +define @intrinsic_vfslide1down_mask_vf_nxv1f16_nxv1f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv1f16_nxv1f16_f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf4,ta,mu +; CHECK-NEXT: vfslide1down.vf v16, v17, ft0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1down.mask.nxv1f16.f16( + %0, + %1, + half %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfslide1down.nxv2f16.f16( + , + half, + i32); + +define @intrinsic_vfslide1down_vf_nxv2f16_nxv2f16_f16( %0, half %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv2f16_nxv2f16_f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf2,ta,mu +; CHECK-NEXT: vfslide1down.vf v16, v16, ft0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1down.nxv2f16.f16( + %0, + half %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vfslide1down.mask.nxv2f16.f16( + , + , + half, + , + i32); + +define @intrinsic_vfslide1down_mask_vf_nxv2f16_nxv2f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv2f16_nxv2f16_f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf2,ta,mu +; CHECK-NEXT: vfslide1down.vf v16, v17, ft0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1down.mask.nxv2f16.f16( + %0, + %1, + half %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfslide1down.nxv4f16.f16( + , + half, + i32); + +define @intrinsic_vfslide1down_vf_nxv4f16_nxv4f16_f16( %0, half %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv4f16_nxv4f16_f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m1,ta,mu +; CHECK-NEXT: vfslide1down.vf v16, v16, ft0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1down.nxv4f16.f16( + %0, + half %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vfslide1down.mask.nxv4f16.f16( + , + , + half, + , + i32); + +define @intrinsic_vfslide1down_mask_vf_nxv4f16_nxv4f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv4f16_nxv4f16_f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m1,ta,mu +; CHECK-NEXT: vfslide1down.vf v16, v17, ft0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1down.mask.nxv4f16.f16( + %0, + %1, + half %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfslide1down.nxv8f16.f16( + , + half, + i32); + +define @intrinsic_vfslide1down_vf_nxv8f16_nxv8f16_f16( %0, half %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv8f16_nxv8f16_f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m2,ta,mu +; CHECK-NEXT: vfslide1down.vf v16, v16, ft0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1down.nxv8f16.f16( + %0, + half %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vfslide1down.mask.nxv8f16.f16( + , + , + half, + , + i32); + +define @intrinsic_vfslide1down_mask_vf_nxv8f16_nxv8f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv8f16_nxv8f16_f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m2,ta,mu +; CHECK-NEXT: vfslide1down.vf v16, v18, ft0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1down.mask.nxv8f16.f16( + %0, + %1, + half %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfslide1down.nxv16f16.f16( + , + half, + i32); + +define @intrinsic_vfslide1down_vf_nxv16f16_nxv16f16_f16( %0, half %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv16f16_nxv16f16_f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m4,ta,mu +; CHECK-NEXT: vfslide1down.vf v16, v16, ft0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1down.nxv16f16.f16( + %0, + half %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vfslide1down.mask.nxv16f16.f16( + , + , + half, + , + i32); + +define @intrinsic_vfslide1down_mask_vf_nxv16f16_nxv16f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv16f16_nxv16f16_f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m4,ta,mu +; CHECK-NEXT: vfslide1down.vf v16, v20, ft0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1down.mask.nxv16f16.f16( + %0, + %1, + half %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfslide1down.nxv32f16.f16( + , + half, + i32); + +define @intrinsic_vfslide1down_vf_nxv32f16_nxv32f16_f16( %0, half %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv32f16_nxv32f16_f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m8,ta,mu +; CHECK-NEXT: vfslide1down.vf v16, v16, ft0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1down.nxv32f16.f16( + %0, + half %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vfslide1down.mask.nxv32f16.f16( + , + , + half, + , + i32); + +define @intrinsic_vfslide1down_mask_vf_nxv32f16_nxv32f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv32f16_nxv32f16_f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e16,m8,ta,mu +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: fmv.h.x ft0, a1 +; CHECK-NEXT: vsetvli a0, a2, e16,m8,ta,mu +; CHECK-NEXT: vfslide1down.vf v16, v8, ft0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1down.mask.nxv32f16.f16( + %0, + %1, + half %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfslide1down.nxv1f32.f32( + , + float, + i32); + +define @intrinsic_vfslide1down_vf_nxv1f32_nxv1f32_f32( %0, float %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv1f32_nxv1f32_f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,mf2,ta,mu +; CHECK-NEXT: vfslide1down.vf v16, v16, ft0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1down.nxv1f32.f32( + %0, + float %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vfslide1down.mask.nxv1f32.f32( + , + , + float, + , + i32); + +define @intrinsic_vfslide1down_mask_vf_nxv1f32_nxv1f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv1f32_nxv1f32_f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,mf2,ta,mu +; CHECK-NEXT: vfslide1down.vf v16, v17, ft0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1down.mask.nxv1f32.f32( + %0, + %1, + float %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfslide1down.nxv2f32.f32( + , + float, + i32); + +define @intrinsic_vfslide1down_vf_nxv2f32_nxv2f32_f32( %0, float %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv2f32_nxv2f32_f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m1,ta,mu +; CHECK-NEXT: vfslide1down.vf v16, v16, ft0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1down.nxv2f32.f32( + %0, + float %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vfslide1down.mask.nxv2f32.f32( + , + , + float, + , + i32); + +define @intrinsic_vfslide1down_mask_vf_nxv2f32_nxv2f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv2f32_nxv2f32_f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m1,ta,mu +; CHECK-NEXT: vfslide1down.vf v16, v17, ft0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1down.mask.nxv2f32.f32( + %0, + %1, + float %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfslide1down.nxv4f32.f32( + , + float, + i32); + +define @intrinsic_vfslide1down_vf_nxv4f32_nxv4f32_f32( %0, float %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv4f32_nxv4f32_f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m2,ta,mu +; CHECK-NEXT: vfslide1down.vf v16, v16, ft0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1down.nxv4f32.f32( + %0, + float %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vfslide1down.mask.nxv4f32.f32( + , + , + float, + , + i32); + +define @intrinsic_vfslide1down_mask_vf_nxv4f32_nxv4f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv4f32_nxv4f32_f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m2,ta,mu +; CHECK-NEXT: vfslide1down.vf v16, v18, ft0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1down.mask.nxv4f32.f32( + %0, + %1, + float %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfslide1down.nxv8f32.f32( + , + float, + i32); + +define @intrinsic_vfslide1down_vf_nxv8f32_nxv8f32_f32( %0, float %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv8f32_nxv8f32_f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m4,ta,mu +; CHECK-NEXT: vfslide1down.vf v16, v16, ft0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1down.nxv8f32.f32( + %0, + float %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vfslide1down.mask.nxv8f32.f32( + , + , + float, + , + i32); + +define @intrinsic_vfslide1down_mask_vf_nxv8f32_nxv8f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv8f32_nxv8f32_f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m4,ta,mu +; CHECK-NEXT: vfslide1down.vf v16, v20, ft0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1down.mask.nxv8f32.f32( + %0, + %1, + float %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfslide1down.nxv16f32.f32( + , + float, + i32); + +define @intrinsic_vfslide1down_vf_nxv16f32_nxv16f32_f32( %0, float %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv16f32_nxv16f32_f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m8,ta,mu +; CHECK-NEXT: vfslide1down.vf v16, v16, ft0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1down.nxv16f32.f32( + %0, + float %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vfslide1down.mask.nxv16f32.f32( + , + , + float, + , + i32); + +define @intrinsic_vfslide1down_mask_vf_nxv16f32_nxv16f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv16f32_nxv16f32_f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e32,m8,ta,mu +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: fmv.w.x ft0, a1 +; CHECK-NEXT: vsetvli a0, a2, e32,m8,ta,mu +; CHECK-NEXT: vfslide1down.vf v16, v8, ft0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1down.mask.nxv16f32.f32( + %0, + %1, + float %2, + %3, + i32 %4) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfslide1down-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfslide1down-rv64.ll new file mode 100644 index 0000000000000..0344da54fe43a --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfslide1down-rv64.ll @@ -0,0 +1,698 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+experimental-zfh -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare @llvm.riscv.vfslide1down.nxv1f16.f16( + , + half, + i64); + +define @intrinsic_vfslide1down_vf_nxv1f16_nxv1f16_f16( %0, half %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv1f16_nxv1f16_f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf4,ta,mu +; CHECK-NEXT: vfslide1down.vf v16, v16, ft0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1down.nxv1f16.f16( + %0, + half %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfslide1down.mask.nxv1f16.f16( + , + , + half, + , + i64); + +define @intrinsic_vfslide1down_mask_vf_nxv1f16_nxv1f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv1f16_nxv1f16_f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf4,ta,mu +; CHECK-NEXT: vfslide1down.vf v16, v17, ft0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1down.mask.nxv1f16.f16( + %0, + %1, + half %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfslide1down.nxv2f16.f16( + , + half, + i64); + +define @intrinsic_vfslide1down_vf_nxv2f16_nxv2f16_f16( %0, half %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv2f16_nxv2f16_f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf2,ta,mu +; CHECK-NEXT: vfslide1down.vf v16, v16, ft0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1down.nxv2f16.f16( + %0, + half %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfslide1down.mask.nxv2f16.f16( + , + , + half, + , + i64); + +define @intrinsic_vfslide1down_mask_vf_nxv2f16_nxv2f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv2f16_nxv2f16_f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf2,ta,mu +; CHECK-NEXT: vfslide1down.vf v16, v17, ft0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1down.mask.nxv2f16.f16( + %0, + %1, + half %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfslide1down.nxv4f16.f16( + , + half, + i64); + +define @intrinsic_vfslide1down_vf_nxv4f16_nxv4f16_f16( %0, half %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv4f16_nxv4f16_f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m1,ta,mu +; CHECK-NEXT: vfslide1down.vf v16, v16, ft0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1down.nxv4f16.f16( + %0, + half %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfslide1down.mask.nxv4f16.f16( + , + , + half, + , + i64); + +define @intrinsic_vfslide1down_mask_vf_nxv4f16_nxv4f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv4f16_nxv4f16_f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m1,ta,mu +; CHECK-NEXT: vfslide1down.vf v16, v17, ft0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1down.mask.nxv4f16.f16( + %0, + %1, + half %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfslide1down.nxv8f16.f16( + , + half, + i64); + +define @intrinsic_vfslide1down_vf_nxv8f16_nxv8f16_f16( %0, half %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv8f16_nxv8f16_f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m2,ta,mu +; CHECK-NEXT: vfslide1down.vf v16, v16, ft0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1down.nxv8f16.f16( + %0, + half %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfslide1down.mask.nxv8f16.f16( + , + , + half, + , + i64); + +define @intrinsic_vfslide1down_mask_vf_nxv8f16_nxv8f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv8f16_nxv8f16_f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m2,ta,mu +; CHECK-NEXT: vfslide1down.vf v16, v18, ft0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1down.mask.nxv8f16.f16( + %0, + %1, + half %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfslide1down.nxv16f16.f16( + , + half, + i64); + +define @intrinsic_vfslide1down_vf_nxv16f16_nxv16f16_f16( %0, half %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv16f16_nxv16f16_f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m4,ta,mu +; CHECK-NEXT: vfslide1down.vf v16, v16, ft0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1down.nxv16f16.f16( + %0, + half %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfslide1down.mask.nxv16f16.f16( + , + , + half, + , + i64); + +define @intrinsic_vfslide1down_mask_vf_nxv16f16_nxv16f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv16f16_nxv16f16_f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m4,ta,mu +; CHECK-NEXT: vfslide1down.vf v16, v20, ft0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1down.mask.nxv16f16.f16( + %0, + %1, + half %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfslide1down.nxv32f16.f16( + , + half, + i64); + +define @intrinsic_vfslide1down_vf_nxv32f16_nxv32f16_f16( %0, half %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv32f16_nxv32f16_f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m8,ta,mu +; CHECK-NEXT: vfslide1down.vf v16, v16, ft0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1down.nxv32f16.f16( + %0, + half %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfslide1down.mask.nxv32f16.f16( + , + , + half, + , + i64); + +define @intrinsic_vfslide1down_mask_vf_nxv32f16_nxv32f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv32f16_nxv32f16_f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e16,m8,ta,mu +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: fmv.h.x ft0, a1 +; CHECK-NEXT: vsetvli a0, a2, e16,m8,ta,mu +; CHECK-NEXT: vfslide1down.vf v16, v8, ft0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1down.mask.nxv32f16.f16( + %0, + %1, + half %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfslide1down.nxv1f32.f32( + , + float, + i64); + +define @intrinsic_vfslide1down_vf_nxv1f32_nxv1f32_f32( %0, float %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv1f32_nxv1f32_f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,mf2,ta,mu +; CHECK-NEXT: vfslide1down.vf v16, v16, ft0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1down.nxv1f32.f32( + %0, + float %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfslide1down.mask.nxv1f32.f32( + , + , + float, + , + i64); + +define @intrinsic_vfslide1down_mask_vf_nxv1f32_nxv1f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv1f32_nxv1f32_f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,mf2,ta,mu +; CHECK-NEXT: vfslide1down.vf v16, v17, ft0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1down.mask.nxv1f32.f32( + %0, + %1, + float %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfslide1down.nxv2f32.f32( + , + float, + i64); + +define @intrinsic_vfslide1down_vf_nxv2f32_nxv2f32_f32( %0, float %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv2f32_nxv2f32_f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m1,ta,mu +; CHECK-NEXT: vfslide1down.vf v16, v16, ft0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1down.nxv2f32.f32( + %0, + float %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfslide1down.mask.nxv2f32.f32( + , + , + float, + , + i64); + +define @intrinsic_vfslide1down_mask_vf_nxv2f32_nxv2f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv2f32_nxv2f32_f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m1,ta,mu +; CHECK-NEXT: vfslide1down.vf v16, v17, ft0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1down.mask.nxv2f32.f32( + %0, + %1, + float %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfslide1down.nxv4f32.f32( + , + float, + i64); + +define @intrinsic_vfslide1down_vf_nxv4f32_nxv4f32_f32( %0, float %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv4f32_nxv4f32_f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m2,ta,mu +; CHECK-NEXT: vfslide1down.vf v16, v16, ft0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1down.nxv4f32.f32( + %0, + float %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfslide1down.mask.nxv4f32.f32( + , + , + float, + , + i64); + +define @intrinsic_vfslide1down_mask_vf_nxv4f32_nxv4f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv4f32_nxv4f32_f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m2,ta,mu +; CHECK-NEXT: vfslide1down.vf v16, v18, ft0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1down.mask.nxv4f32.f32( + %0, + %1, + float %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfslide1down.nxv8f32.f32( + , + float, + i64); + +define @intrinsic_vfslide1down_vf_nxv8f32_nxv8f32_f32( %0, float %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv8f32_nxv8f32_f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m4,ta,mu +; CHECK-NEXT: vfslide1down.vf v16, v16, ft0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1down.nxv8f32.f32( + %0, + float %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfslide1down.mask.nxv8f32.f32( + , + , + float, + , + i64); + +define @intrinsic_vfslide1down_mask_vf_nxv8f32_nxv8f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv8f32_nxv8f32_f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m4,ta,mu +; CHECK-NEXT: vfslide1down.vf v16, v20, ft0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1down.mask.nxv8f32.f32( + %0, + %1, + float %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfslide1down.nxv16f32.f32( + , + float, + i64); + +define @intrinsic_vfslide1down_vf_nxv16f32_nxv16f32_f32( %0, float %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv16f32_nxv16f32_f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m8,ta,mu +; CHECK-NEXT: vfslide1down.vf v16, v16, ft0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1down.nxv16f32.f32( + %0, + float %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfslide1down.mask.nxv16f32.f32( + , + , + float, + , + i64); + +define @intrinsic_vfslide1down_mask_vf_nxv16f32_nxv16f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv16f32_nxv16f32_f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e32,m8,ta,mu +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: fmv.w.x ft0, a1 +; CHECK-NEXT: vsetvli a0, a2, e32,m8,ta,mu +; CHECK-NEXT: vfslide1down.vf v16, v8, ft0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1down.mask.nxv16f32.f32( + %0, + %1, + float %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfslide1down.nxv1f64.f64( + , + double, + i64); + +define @intrinsic_vfslide1down_vf_nxv1f64_nxv1f64_f64( %0, double %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv1f64_nxv1f64_f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.d.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e64,m1,ta,mu +; CHECK-NEXT: vfslide1down.vf v16, v16, ft0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1down.nxv1f64.f64( + %0, + double %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfslide1down.mask.nxv1f64.f64( + , + , + double, + , + i64); + +define @intrinsic_vfslide1down_mask_vf_nxv1f64_nxv1f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv1f64_nxv1f64_f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.d.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e64,m1,ta,mu +; CHECK-NEXT: vfslide1down.vf v16, v17, ft0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1down.mask.nxv1f64.f64( + %0, + %1, + double %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfslide1down.nxv2f64.f64( + , + double, + i64); + +define @intrinsic_vfslide1down_vf_nxv2f64_nxv2f64_f64( %0, double %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv2f64_nxv2f64_f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.d.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e64,m2,ta,mu +; CHECK-NEXT: vfslide1down.vf v16, v16, ft0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1down.nxv2f64.f64( + %0, + double %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfslide1down.mask.nxv2f64.f64( + , + , + double, + , + i64); + +define @intrinsic_vfslide1down_mask_vf_nxv2f64_nxv2f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv2f64_nxv2f64_f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.d.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e64,m2,ta,mu +; CHECK-NEXT: vfslide1down.vf v16, v18, ft0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1down.mask.nxv2f64.f64( + %0, + %1, + double %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfslide1down.nxv4f64.f64( + , + double, + i64); + +define @intrinsic_vfslide1down_vf_nxv4f64_nxv4f64_f64( %0, double %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv4f64_nxv4f64_f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.d.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e64,m4,ta,mu +; CHECK-NEXT: vfslide1down.vf v16, v16, ft0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1down.nxv4f64.f64( + %0, + double %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfslide1down.mask.nxv4f64.f64( + , + , + double, + , + i64); + +define @intrinsic_vfslide1down_mask_vf_nxv4f64_nxv4f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv4f64_nxv4f64_f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.d.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e64,m4,ta,mu +; CHECK-NEXT: vfslide1down.vf v16, v20, ft0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1down.mask.nxv4f64.f64( + %0, + %1, + double %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfslide1down.nxv8f64.f64( + , + double, + i64); + +define @intrinsic_vfslide1down_vf_nxv8f64_nxv8f64_f64( %0, double %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv8f64_nxv8f64_f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.d.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e64,m8,ta,mu +; CHECK-NEXT: vfslide1down.vf v16, v16, ft0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1down.nxv8f64.f64( + %0, + double %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfslide1down.mask.nxv8f64.f64( + , + , + double, + , + i64); + +define @intrinsic_vfslide1down_mask_vf_nxv8f64_nxv8f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv8f64_nxv8f64_f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e64,m8,ta,mu +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: fmv.d.x ft0, a1 +; CHECK-NEXT: vsetvli a0, a2, e64,m8,ta,mu +; CHECK-NEXT: vfslide1down.vf v16, v8, ft0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1down.mask.nxv8f64.f64( + %0, + %1, + double %2, + %3, + i64 %4) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfslide1up-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfslide1up-rv32.ll new file mode 100644 index 0000000000000..ce849b6d286ee --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfslide1up-rv32.ll @@ -0,0 +1,523 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f,+experimental-zfh -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare @llvm.riscv.vfslide1up.nxv1f16.f16( + , + half, + i32); + +define @intrinsic_vfslide1up_vf_nxv1f16_nxv1f16_f16( %0, half %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv1f16_nxv1f16_f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf4,ta,mu +; CHECK-NEXT: vfslide1up.vf v25, v16, ft0 +; CHECK-NEXT: vmv1r.v v16, v25 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1up.nxv1f16.f16( + %0, + half %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vfslide1up.mask.nxv1f16.f16( + , + , + half, + , + i32); + +define @intrinsic_vfslide1up_mask_vf_nxv1f16_nxv1f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv1f16_nxv1f16_f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf4,ta,mu +; CHECK-NEXT: vfslide1up.vf v16, v17, ft0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1up.mask.nxv1f16.f16( + %0, + %1, + half %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfslide1up.nxv2f16.f16( + , + half, + i32); + +define @intrinsic_vfslide1up_vf_nxv2f16_nxv2f16_f16( %0, half %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv2f16_nxv2f16_f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf2,ta,mu +; CHECK-NEXT: vfslide1up.vf v25, v16, ft0 +; CHECK-NEXT: vmv1r.v v16, v25 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1up.nxv2f16.f16( + %0, + half %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vfslide1up.mask.nxv2f16.f16( + , + , + half, + , + i32); + +define @intrinsic_vfslide1up_mask_vf_nxv2f16_nxv2f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv2f16_nxv2f16_f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf2,ta,mu +; CHECK-NEXT: vfslide1up.vf v16, v17, ft0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1up.mask.nxv2f16.f16( + %0, + %1, + half %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfslide1up.nxv4f16.f16( + , + half, + i32); + +define @intrinsic_vfslide1up_vf_nxv4f16_nxv4f16_f16( %0, half %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv4f16_nxv4f16_f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m1,ta,mu +; CHECK-NEXT: vfslide1up.vf v25, v16, ft0 +; CHECK-NEXT: vmv1r.v v16, v25 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1up.nxv4f16.f16( + %0, + half %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vfslide1up.mask.nxv4f16.f16( + , + , + half, + , + i32); + +define @intrinsic_vfslide1up_mask_vf_nxv4f16_nxv4f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv4f16_nxv4f16_f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m1,ta,mu +; CHECK-NEXT: vfslide1up.vf v16, v17, ft0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1up.mask.nxv4f16.f16( + %0, + %1, + half %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfslide1up.nxv8f16.f16( + , + half, + i32); + +define @intrinsic_vfslide1up_vf_nxv8f16_nxv8f16_f16( %0, half %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv8f16_nxv8f16_f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m2,ta,mu +; CHECK-NEXT: vfslide1up.vf v26, v16, ft0 +; CHECK-NEXT: vmv2r.v v16, v26 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1up.nxv8f16.f16( + %0, + half %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vfslide1up.mask.nxv8f16.f16( + , + , + half, + , + i32); + +define @intrinsic_vfslide1up_mask_vf_nxv8f16_nxv8f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv8f16_nxv8f16_f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m2,ta,mu +; CHECK-NEXT: vfslide1up.vf v16, v18, ft0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1up.mask.nxv8f16.f16( + %0, + %1, + half %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfslide1up.nxv16f16.f16( + , + half, + i32); + +define @intrinsic_vfslide1up_vf_nxv16f16_nxv16f16_f16( %0, half %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv16f16_nxv16f16_f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m4,ta,mu +; CHECK-NEXT: vfslide1up.vf v28, v16, ft0 +; CHECK-NEXT: vmv4r.v v16, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1up.nxv16f16.f16( + %0, + half %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vfslide1up.mask.nxv16f16.f16( + , + , + half, + , + i32); + +define @intrinsic_vfslide1up_mask_vf_nxv16f16_nxv16f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv16f16_nxv16f16_f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m4,ta,mu +; CHECK-NEXT: vfslide1up.vf v16, v20, ft0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1up.mask.nxv16f16.f16( + %0, + %1, + half %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfslide1up.nxv32f16.f16( + , + half, + i32); + +define @intrinsic_vfslide1up_vf_nxv32f16_nxv32f16_f16( %0, half %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv32f16_nxv32f16_f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m8,ta,mu +; CHECK-NEXT: vfslide1up.vf v8, v16, ft0 +; CHECK-NEXT: vmv8r.v v16, v8 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1up.nxv32f16.f16( + %0, + half %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vfslide1up.mask.nxv32f16.f16( + , + , + half, + , + i32); + +define @intrinsic_vfslide1up_mask_vf_nxv32f16_nxv32f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv32f16_nxv32f16_f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e16,m8,ta,mu +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: fmv.h.x ft0, a1 +; CHECK-NEXT: vsetvli a0, a2, e16,m8,ta,mu +; CHECK-NEXT: vfslide1up.vf v16, v8, ft0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1up.mask.nxv32f16.f16( + %0, + %1, + half %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfslide1up.nxv1f32.f32( + , + float, + i32); + +define @intrinsic_vfslide1up_vf_nxv1f32_nxv1f32_f32( %0, float %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv1f32_nxv1f32_f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,mf2,ta,mu +; CHECK-NEXT: vfslide1up.vf v25, v16, ft0 +; CHECK-NEXT: vmv1r.v v16, v25 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1up.nxv1f32.f32( + %0, + float %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vfslide1up.mask.nxv1f32.f32( + , + , + float, + , + i32); + +define @intrinsic_vfslide1up_mask_vf_nxv1f32_nxv1f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv1f32_nxv1f32_f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,mf2,ta,mu +; CHECK-NEXT: vfslide1up.vf v16, v17, ft0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1up.mask.nxv1f32.f32( + %0, + %1, + float %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfslide1up.nxv2f32.f32( + , + float, + i32); + +define @intrinsic_vfslide1up_vf_nxv2f32_nxv2f32_f32( %0, float %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv2f32_nxv2f32_f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m1,ta,mu +; CHECK-NEXT: vfslide1up.vf v25, v16, ft0 +; CHECK-NEXT: vmv1r.v v16, v25 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1up.nxv2f32.f32( + %0, + float %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vfslide1up.mask.nxv2f32.f32( + , + , + float, + , + i32); + +define @intrinsic_vfslide1up_mask_vf_nxv2f32_nxv2f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv2f32_nxv2f32_f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m1,ta,mu +; CHECK-NEXT: vfslide1up.vf v16, v17, ft0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1up.mask.nxv2f32.f32( + %0, + %1, + float %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfslide1up.nxv4f32.f32( + , + float, + i32); + +define @intrinsic_vfslide1up_vf_nxv4f32_nxv4f32_f32( %0, float %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv4f32_nxv4f32_f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m2,ta,mu +; CHECK-NEXT: vfslide1up.vf v26, v16, ft0 +; CHECK-NEXT: vmv2r.v v16, v26 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1up.nxv4f32.f32( + %0, + float %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vfslide1up.mask.nxv4f32.f32( + , + , + float, + , + i32); + +define @intrinsic_vfslide1up_mask_vf_nxv4f32_nxv4f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv4f32_nxv4f32_f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m2,ta,mu +; CHECK-NEXT: vfslide1up.vf v16, v18, ft0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1up.mask.nxv4f32.f32( + %0, + %1, + float %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfslide1up.nxv8f32.f32( + , + float, + i32); + +define @intrinsic_vfslide1up_vf_nxv8f32_nxv8f32_f32( %0, float %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv8f32_nxv8f32_f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m4,ta,mu +; CHECK-NEXT: vfslide1up.vf v28, v16, ft0 +; CHECK-NEXT: vmv4r.v v16, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1up.nxv8f32.f32( + %0, + float %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vfslide1up.mask.nxv8f32.f32( + , + , + float, + , + i32); + +define @intrinsic_vfslide1up_mask_vf_nxv8f32_nxv8f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv8f32_nxv8f32_f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m4,ta,mu +; CHECK-NEXT: vfslide1up.vf v16, v20, ft0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1up.mask.nxv8f32.f32( + %0, + %1, + float %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfslide1up.nxv16f32.f32( + , + float, + i32); + +define @intrinsic_vfslide1up_vf_nxv16f32_nxv16f32_f32( %0, float %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv16f32_nxv16f32_f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m8,ta,mu +; CHECK-NEXT: vfslide1up.vf v8, v16, ft0 +; CHECK-NEXT: vmv8r.v v16, v8 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1up.nxv16f32.f32( + %0, + float %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vfslide1up.mask.nxv16f32.f32( + , + , + float, + , + i32); + +define @intrinsic_vfslide1up_mask_vf_nxv16f32_nxv16f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv16f32_nxv16f32_f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e32,m8,ta,mu +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: fmv.w.x ft0, a1 +; CHECK-NEXT: vsetvli a0, a2, e32,m8,ta,mu +; CHECK-NEXT: vfslide1up.vf v16, v8, ft0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1up.mask.nxv16f32.f32( + %0, + %1, + float %2, + %3, + i32 %4) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfslide1up-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfslide1up-rv64.ll new file mode 100644 index 0000000000000..765e186116ae6 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfslide1up-rv64.ll @@ -0,0 +1,713 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+experimental-zfh -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare @llvm.riscv.vfslide1up.nxv1f16.f16( + , + half, + i64); + +define @intrinsic_vfslide1up_vf_nxv1f16_nxv1f16_f16( %0, half %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv1f16_nxv1f16_f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf4,ta,mu +; CHECK-NEXT: vfslide1up.vf v25, v16, ft0 +; CHECK-NEXT: vmv1r.v v16, v25 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1up.nxv1f16.f16( + %0, + half %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfslide1up.mask.nxv1f16.f16( + , + , + half, + , + i64); + +define @intrinsic_vfslide1up_mask_vf_nxv1f16_nxv1f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv1f16_nxv1f16_f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf4,ta,mu +; CHECK-NEXT: vfslide1up.vf v16, v17, ft0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1up.mask.nxv1f16.f16( + %0, + %1, + half %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfslide1up.nxv2f16.f16( + , + half, + i64); + +define @intrinsic_vfslide1up_vf_nxv2f16_nxv2f16_f16( %0, half %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv2f16_nxv2f16_f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf2,ta,mu +; CHECK-NEXT: vfslide1up.vf v25, v16, ft0 +; CHECK-NEXT: vmv1r.v v16, v25 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1up.nxv2f16.f16( + %0, + half %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfslide1up.mask.nxv2f16.f16( + , + , + half, + , + i64); + +define @intrinsic_vfslide1up_mask_vf_nxv2f16_nxv2f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv2f16_nxv2f16_f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf2,ta,mu +; CHECK-NEXT: vfslide1up.vf v16, v17, ft0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1up.mask.nxv2f16.f16( + %0, + %1, + half %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfslide1up.nxv4f16.f16( + , + half, + i64); + +define @intrinsic_vfslide1up_vf_nxv4f16_nxv4f16_f16( %0, half %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv4f16_nxv4f16_f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m1,ta,mu +; CHECK-NEXT: vfslide1up.vf v25, v16, ft0 +; CHECK-NEXT: vmv1r.v v16, v25 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1up.nxv4f16.f16( + %0, + half %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfslide1up.mask.nxv4f16.f16( + , + , + half, + , + i64); + +define @intrinsic_vfslide1up_mask_vf_nxv4f16_nxv4f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv4f16_nxv4f16_f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m1,ta,mu +; CHECK-NEXT: vfslide1up.vf v16, v17, ft0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1up.mask.nxv4f16.f16( + %0, + %1, + half %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfslide1up.nxv8f16.f16( + , + half, + i64); + +define @intrinsic_vfslide1up_vf_nxv8f16_nxv8f16_f16( %0, half %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv8f16_nxv8f16_f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m2,ta,mu +; CHECK-NEXT: vfslide1up.vf v26, v16, ft0 +; CHECK-NEXT: vmv2r.v v16, v26 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1up.nxv8f16.f16( + %0, + half %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfslide1up.mask.nxv8f16.f16( + , + , + half, + , + i64); + +define @intrinsic_vfslide1up_mask_vf_nxv8f16_nxv8f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv8f16_nxv8f16_f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m2,ta,mu +; CHECK-NEXT: vfslide1up.vf v16, v18, ft0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1up.mask.nxv8f16.f16( + %0, + %1, + half %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfslide1up.nxv16f16.f16( + , + half, + i64); + +define @intrinsic_vfslide1up_vf_nxv16f16_nxv16f16_f16( %0, half %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv16f16_nxv16f16_f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m4,ta,mu +; CHECK-NEXT: vfslide1up.vf v28, v16, ft0 +; CHECK-NEXT: vmv4r.v v16, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1up.nxv16f16.f16( + %0, + half %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfslide1up.mask.nxv16f16.f16( + , + , + half, + , + i64); + +define @intrinsic_vfslide1up_mask_vf_nxv16f16_nxv16f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv16f16_nxv16f16_f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m4,ta,mu +; CHECK-NEXT: vfslide1up.vf v16, v20, ft0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1up.mask.nxv16f16.f16( + %0, + %1, + half %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfslide1up.nxv32f16.f16( + , + half, + i64); + +define @intrinsic_vfslide1up_vf_nxv32f16_nxv32f16_f16( %0, half %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv32f16_nxv32f16_f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m8,ta,mu +; CHECK-NEXT: vfslide1up.vf v8, v16, ft0 +; CHECK-NEXT: vmv8r.v v16, v8 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1up.nxv32f16.f16( + %0, + half %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfslide1up.mask.nxv32f16.f16( + , + , + half, + , + i64); + +define @intrinsic_vfslide1up_mask_vf_nxv32f16_nxv32f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv32f16_nxv32f16_f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e16,m8,ta,mu +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: fmv.h.x ft0, a1 +; CHECK-NEXT: vsetvli a0, a2, e16,m8,ta,mu +; CHECK-NEXT: vfslide1up.vf v16, v8, ft0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1up.mask.nxv32f16.f16( + %0, + %1, + half %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfslide1up.nxv1f32.f32( + , + float, + i64); + +define @intrinsic_vfslide1up_vf_nxv1f32_nxv1f32_f32( %0, float %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv1f32_nxv1f32_f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,mf2,ta,mu +; CHECK-NEXT: vfslide1up.vf v25, v16, ft0 +; CHECK-NEXT: vmv1r.v v16, v25 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1up.nxv1f32.f32( + %0, + float %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfslide1up.mask.nxv1f32.f32( + , + , + float, + , + i64); + +define @intrinsic_vfslide1up_mask_vf_nxv1f32_nxv1f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv1f32_nxv1f32_f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,mf2,ta,mu +; CHECK-NEXT: vfslide1up.vf v16, v17, ft0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1up.mask.nxv1f32.f32( + %0, + %1, + float %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfslide1up.nxv2f32.f32( + , + float, + i64); + +define @intrinsic_vfslide1up_vf_nxv2f32_nxv2f32_f32( %0, float %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv2f32_nxv2f32_f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m1,ta,mu +; CHECK-NEXT: vfslide1up.vf v25, v16, ft0 +; CHECK-NEXT: vmv1r.v v16, v25 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1up.nxv2f32.f32( + %0, + float %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfslide1up.mask.nxv2f32.f32( + , + , + float, + , + i64); + +define @intrinsic_vfslide1up_mask_vf_nxv2f32_nxv2f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv2f32_nxv2f32_f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m1,ta,mu +; CHECK-NEXT: vfslide1up.vf v16, v17, ft0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1up.mask.nxv2f32.f32( + %0, + %1, + float %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfslide1up.nxv4f32.f32( + , + float, + i64); + +define @intrinsic_vfslide1up_vf_nxv4f32_nxv4f32_f32( %0, float %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv4f32_nxv4f32_f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m2,ta,mu +; CHECK-NEXT: vfslide1up.vf v26, v16, ft0 +; CHECK-NEXT: vmv2r.v v16, v26 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1up.nxv4f32.f32( + %0, + float %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfslide1up.mask.nxv4f32.f32( + , + , + float, + , + i64); + +define @intrinsic_vfslide1up_mask_vf_nxv4f32_nxv4f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv4f32_nxv4f32_f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m2,ta,mu +; CHECK-NEXT: vfslide1up.vf v16, v18, ft0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1up.mask.nxv4f32.f32( + %0, + %1, + float %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfslide1up.nxv8f32.f32( + , + float, + i64); + +define @intrinsic_vfslide1up_vf_nxv8f32_nxv8f32_f32( %0, float %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv8f32_nxv8f32_f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m4,ta,mu +; CHECK-NEXT: vfslide1up.vf v28, v16, ft0 +; CHECK-NEXT: vmv4r.v v16, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1up.nxv8f32.f32( + %0, + float %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfslide1up.mask.nxv8f32.f32( + , + , + float, + , + i64); + +define @intrinsic_vfslide1up_mask_vf_nxv8f32_nxv8f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv8f32_nxv8f32_f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m4,ta,mu +; CHECK-NEXT: vfslide1up.vf v16, v20, ft0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1up.mask.nxv8f32.f32( + %0, + %1, + float %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfslide1up.nxv16f32.f32( + , + float, + i64); + +define @intrinsic_vfslide1up_vf_nxv16f32_nxv16f32_f32( %0, float %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv16f32_nxv16f32_f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m8,ta,mu +; CHECK-NEXT: vfslide1up.vf v8, v16, ft0 +; CHECK-NEXT: vmv8r.v v16, v8 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1up.nxv16f32.f32( + %0, + float %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfslide1up.mask.nxv16f32.f32( + , + , + float, + , + i64); + +define @intrinsic_vfslide1up_mask_vf_nxv16f32_nxv16f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv16f32_nxv16f32_f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e32,m8,ta,mu +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: fmv.w.x ft0, a1 +; CHECK-NEXT: vsetvli a0, a2, e32,m8,ta,mu +; CHECK-NEXT: vfslide1up.vf v16, v8, ft0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1up.mask.nxv16f32.f32( + %0, + %1, + float %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfslide1up.nxv1f64.f64( + , + double, + i64); + +define @intrinsic_vfslide1up_vf_nxv1f64_nxv1f64_f64( %0, double %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv1f64_nxv1f64_f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.d.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e64,m1,ta,mu +; CHECK-NEXT: vfslide1up.vf v25, v16, ft0 +; CHECK-NEXT: vmv1r.v v16, v25 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1up.nxv1f64.f64( + %0, + double %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfslide1up.mask.nxv1f64.f64( + , + , + double, + , + i64); + +define @intrinsic_vfslide1up_mask_vf_nxv1f64_nxv1f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv1f64_nxv1f64_f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.d.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e64,m1,ta,mu +; CHECK-NEXT: vfslide1up.vf v16, v17, ft0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1up.mask.nxv1f64.f64( + %0, + %1, + double %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfslide1up.nxv2f64.f64( + , + double, + i64); + +define @intrinsic_vfslide1up_vf_nxv2f64_nxv2f64_f64( %0, double %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv2f64_nxv2f64_f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.d.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e64,m2,ta,mu +; CHECK-NEXT: vfslide1up.vf v26, v16, ft0 +; CHECK-NEXT: vmv2r.v v16, v26 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1up.nxv2f64.f64( + %0, + double %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfslide1up.mask.nxv2f64.f64( + , + , + double, + , + i64); + +define @intrinsic_vfslide1up_mask_vf_nxv2f64_nxv2f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv2f64_nxv2f64_f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.d.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e64,m2,ta,mu +; CHECK-NEXT: vfslide1up.vf v16, v18, ft0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1up.mask.nxv2f64.f64( + %0, + %1, + double %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfslide1up.nxv4f64.f64( + , + double, + i64); + +define @intrinsic_vfslide1up_vf_nxv4f64_nxv4f64_f64( %0, double %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv4f64_nxv4f64_f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.d.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e64,m4,ta,mu +; CHECK-NEXT: vfslide1up.vf v28, v16, ft0 +; CHECK-NEXT: vmv4r.v v16, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1up.nxv4f64.f64( + %0, + double %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfslide1up.mask.nxv4f64.f64( + , + , + double, + , + i64); + +define @intrinsic_vfslide1up_mask_vf_nxv4f64_nxv4f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv4f64_nxv4f64_f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.d.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e64,m4,ta,mu +; CHECK-NEXT: vfslide1up.vf v16, v20, ft0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1up.mask.nxv4f64.f64( + %0, + %1, + double %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfslide1up.nxv8f64.f64( + , + double, + i64); + +define @intrinsic_vfslide1up_vf_nxv8f64_nxv8f64_f64( %0, double %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv8f64_nxv8f64_f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.d.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e64,m8,ta,mu +; CHECK-NEXT: vfslide1up.vf v8, v16, ft0 +; CHECK-NEXT: vmv8r.v v16, v8 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1up.nxv8f64.f64( + %0, + double %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfslide1up.mask.nxv8f64.f64( + , + , + double, + , + i64); + +define @intrinsic_vfslide1up_mask_vf_nxv8f64_nxv8f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv8f64_nxv8f64_f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e64,m8,ta,mu +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: fmv.d.x ft0, a1 +; CHECK-NEXT: vsetvli a0, a2, e64,m8,ta,mu +; CHECK-NEXT: vfslide1up.vf v16, v8, ft0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfslide1up.mask.nxv8f64.f64( + %0, + %1, + double %2, + %3, + i64 %4) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vslide1down-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vslide1down-rv32.ll new file mode 100644 index 0000000000000..1ff3600d7c9ad --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vslide1down-rv32.ll @@ -0,0 +1,800 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare @llvm.riscv.vslide1down.nxv1i8.i8( + , + i8, + i32); + +define @intrinsic_vslide1down_vx_nxv1i8_nxv1i8_i8( %0, i8 %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_vx_nxv1i8_nxv1i8_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,mf8,ta,mu +; CHECK-NEXT: vslide1down.vx v16, v16, a0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1down.nxv1i8.i8( + %0, + i8 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vslide1down.mask.nxv1i8.i8( + , + , + i8, + , + i32); + +define @intrinsic_vslide1down_mask_vx_nxv1i8_nxv1i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_mask_vx_nxv1i8_nxv1i8_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,mf8,ta,mu +; CHECK-NEXT: vslide1down.vx v16, v17, a0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1down.mask.nxv1i8.i8( + %0, + %1, + i8 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vslide1down.nxv2i8.i8( + , + i8, + i32); + +define @intrinsic_vslide1down_vx_nxv2i8_nxv2i8_i8( %0, i8 %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_vx_nxv2i8_nxv2i8_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,mf4,ta,mu +; CHECK-NEXT: vslide1down.vx v16, v16, a0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1down.nxv2i8.i8( + %0, + i8 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vslide1down.mask.nxv2i8.i8( + , + , + i8, + , + i32); + +define @intrinsic_vslide1down_mask_vx_nxv2i8_nxv2i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_mask_vx_nxv2i8_nxv2i8_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,mf4,ta,mu +; CHECK-NEXT: vslide1down.vx v16, v17, a0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1down.mask.nxv2i8.i8( + %0, + %1, + i8 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vslide1down.nxv4i8.i8( + , + i8, + i32); + +define @intrinsic_vslide1down_vx_nxv4i8_nxv4i8_i8( %0, i8 %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_vx_nxv4i8_nxv4i8_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,mf2,ta,mu +; CHECK-NEXT: vslide1down.vx v16, v16, a0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1down.nxv4i8.i8( + %0, + i8 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vslide1down.mask.nxv4i8.i8( + , + , + i8, + , + i32); + +define @intrinsic_vslide1down_mask_vx_nxv4i8_nxv4i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_mask_vx_nxv4i8_nxv4i8_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,mf2,ta,mu +; CHECK-NEXT: vslide1down.vx v16, v17, a0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1down.mask.nxv4i8.i8( + %0, + %1, + i8 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vslide1down.nxv8i8.i8( + , + i8, + i32); + +define @intrinsic_vslide1down_vx_nxv8i8_nxv8i8_i8( %0, i8 %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_vx_nxv8i8_nxv8i8_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vslide1down.vx v16, v16, a0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1down.nxv8i8.i8( + %0, + i8 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vslide1down.mask.nxv8i8.i8( + , + , + i8, + , + i32); + +define @intrinsic_vslide1down_mask_vx_nxv8i8_nxv8i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_mask_vx_nxv8i8_nxv8i8_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vslide1down.vx v16, v17, a0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1down.mask.nxv8i8.i8( + %0, + %1, + i8 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vslide1down.nxv16i8.i8( + , + i8, + i32); + +define @intrinsic_vslide1down_vx_nxv16i8_nxv16i8_i8( %0, i8 %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_vx_nxv16i8_nxv16i8_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,m2,ta,mu +; CHECK-NEXT: vslide1down.vx v16, v16, a0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1down.nxv16i8.i8( + %0, + i8 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vslide1down.mask.nxv16i8.i8( + , + , + i8, + , + i32); + +define @intrinsic_vslide1down_mask_vx_nxv16i8_nxv16i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_mask_vx_nxv16i8_nxv16i8_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,m2,ta,mu +; CHECK-NEXT: vslide1down.vx v16, v18, a0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1down.mask.nxv16i8.i8( + %0, + %1, + i8 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vslide1down.nxv32i8.i8( + , + i8, + i32); + +define @intrinsic_vslide1down_vx_nxv32i8_nxv32i8_i8( %0, i8 %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_vx_nxv32i8_nxv32i8_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,m4,ta,mu +; CHECK-NEXT: vslide1down.vx v16, v16, a0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1down.nxv32i8.i8( + %0, + i8 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vslide1down.mask.nxv32i8.i8( + , + , + i8, + , + i32); + +define @intrinsic_vslide1down_mask_vx_nxv32i8_nxv32i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_mask_vx_nxv32i8_nxv32i8_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,m4,ta,mu +; CHECK-NEXT: vslide1down.vx v16, v20, a0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1down.mask.nxv32i8.i8( + %0, + %1, + i8 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vslide1down.nxv64i8.i8( + , + i8, + i32); + +define @intrinsic_vslide1down_vx_nxv64i8_nxv64i8_i8( %0, i8 %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_vx_nxv64i8_nxv64i8_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,m8,ta,mu +; CHECK-NEXT: vslide1down.vx v16, v16, a0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1down.nxv64i8.i8( + %0, + i8 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vslide1down.mask.nxv64i8.i8( + , + , + i8, + , + i32); + +define @intrinsic_vslide1down_mask_vx_nxv64i8_nxv64i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_mask_vx_nxv64i8_nxv64i8_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e8,m8,ta,mu +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vsetvli a0, a2, e8,m8,ta,mu +; CHECK-NEXT: vslide1down.vx v16, v8, a1, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1down.mask.nxv64i8.i8( + %0, + %1, + i8 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vslide1down.nxv1i16.i16( + , + i16, + i32); + +define @intrinsic_vslide1down_vx_nxv1i16_nxv1i16_i16( %0, i16 %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_vx_nxv1i16_nxv1i16_i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,mf4,ta,mu +; CHECK-NEXT: vslide1down.vx v16, v16, a0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1down.nxv1i16.i16( + %0, + i16 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vslide1down.mask.nxv1i16.i16( + , + , + i16, + , + i32); + +define @intrinsic_vslide1down_mask_vx_nxv1i16_nxv1i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_mask_vx_nxv1i16_nxv1i16_i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,mf4,ta,mu +; CHECK-NEXT: vslide1down.vx v16, v17, a0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1down.mask.nxv1i16.i16( + %0, + %1, + i16 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vslide1down.nxv2i16.i16( + , + i16, + i32); + +define @intrinsic_vslide1down_vx_nxv2i16_nxv2i16_i16( %0, i16 %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_vx_nxv2i16_nxv2i16_i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,mf2,ta,mu +; CHECK-NEXT: vslide1down.vx v16, v16, a0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1down.nxv2i16.i16( + %0, + i16 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vslide1down.mask.nxv2i16.i16( + , + , + i16, + , + i32); + +define @intrinsic_vslide1down_mask_vx_nxv2i16_nxv2i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_mask_vx_nxv2i16_nxv2i16_i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,mf2,ta,mu +; CHECK-NEXT: vslide1down.vx v16, v17, a0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1down.mask.nxv2i16.i16( + %0, + %1, + i16 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vslide1down.nxv4i16.i16( + , + i16, + i32); + +define @intrinsic_vslide1down_vx_nxv4i16_nxv4i16_i16( %0, i16 %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_vx_nxv4i16_nxv4i16_i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vslide1down.vx v16, v16, a0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1down.nxv4i16.i16( + %0, + i16 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vslide1down.mask.nxv4i16.i16( + , + , + i16, + , + i32); + +define @intrinsic_vslide1down_mask_vx_nxv4i16_nxv4i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_mask_vx_nxv4i16_nxv4i16_i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vslide1down.vx v16, v17, a0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1down.mask.nxv4i16.i16( + %0, + %1, + i16 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vslide1down.nxv8i16.i16( + , + i16, + i32); + +define @intrinsic_vslide1down_vx_nxv8i16_nxv8i16_i16( %0, i16 %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_vx_nxv8i16_nxv8i16_i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,m2,ta,mu +; CHECK-NEXT: vslide1down.vx v16, v16, a0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1down.nxv8i16.i16( + %0, + i16 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vslide1down.mask.nxv8i16.i16( + , + , + i16, + , + i32); + +define @intrinsic_vslide1down_mask_vx_nxv8i16_nxv8i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_mask_vx_nxv8i16_nxv8i16_i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,m2,ta,mu +; CHECK-NEXT: vslide1down.vx v16, v18, a0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1down.mask.nxv8i16.i16( + %0, + %1, + i16 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vslide1down.nxv16i16.i16( + , + i16, + i32); + +define @intrinsic_vslide1down_vx_nxv16i16_nxv16i16_i16( %0, i16 %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_vx_nxv16i16_nxv16i16_i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,m4,ta,mu +; CHECK-NEXT: vslide1down.vx v16, v16, a0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1down.nxv16i16.i16( + %0, + i16 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vslide1down.mask.nxv16i16.i16( + , + , + i16, + , + i32); + +define @intrinsic_vslide1down_mask_vx_nxv16i16_nxv16i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_mask_vx_nxv16i16_nxv16i16_i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,m4,ta,mu +; CHECK-NEXT: vslide1down.vx v16, v20, a0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1down.mask.nxv16i16.i16( + %0, + %1, + i16 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vslide1down.nxv32i16.i16( + , + i16, + i32); + +define @intrinsic_vslide1down_vx_nxv32i16_nxv32i16_i16( %0, i16 %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_vx_nxv32i16_nxv32i16_i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,m8,ta,mu +; CHECK-NEXT: vslide1down.vx v16, v16, a0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1down.nxv32i16.i16( + %0, + i16 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vslide1down.mask.nxv32i16.i16( + , + , + i16, + , + i32); + +define @intrinsic_vslide1down_mask_vx_nxv32i16_nxv32i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_mask_vx_nxv32i16_nxv32i16_i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e16,m8,ta,mu +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vsetvli a0, a2, e16,m8,ta,mu +; CHECK-NEXT: vslide1down.vx v16, v8, a1, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1down.mask.nxv32i16.i16( + %0, + %1, + i16 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vslide1down.nxv1i32.i32( + , + i32, + i32); + +define @intrinsic_vslide1down_vx_nxv1i32_nxv1i32_i32( %0, i32 %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_vx_nxv1i32_nxv1i32_i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e32,mf2,ta,mu +; CHECK-NEXT: vslide1down.vx v16, v16, a0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1down.nxv1i32.i32( + %0, + i32 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vslide1down.mask.nxv1i32.i32( + , + , + i32, + , + i32); + +define @intrinsic_vslide1down_mask_vx_nxv1i32_nxv1i32_i32( %0, %1, i32 %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_mask_vx_nxv1i32_nxv1i32_i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e32,mf2,ta,mu +; CHECK-NEXT: vslide1down.vx v16, v17, a0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1down.mask.nxv1i32.i32( + %0, + %1, + i32 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vslide1down.nxv2i32.i32( + , + i32, + i32); + +define @intrinsic_vslide1down_vx_nxv2i32_nxv2i32_i32( %0, i32 %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_vx_nxv2i32_nxv2i32_i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; CHECK-NEXT: vslide1down.vx v16, v16, a0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1down.nxv2i32.i32( + %0, + i32 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vslide1down.mask.nxv2i32.i32( + , + , + i32, + , + i32); + +define @intrinsic_vslide1down_mask_vx_nxv2i32_nxv2i32_i32( %0, %1, i32 %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_mask_vx_nxv2i32_nxv2i32_i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; CHECK-NEXT: vslide1down.vx v16, v17, a0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1down.mask.nxv2i32.i32( + %0, + %1, + i32 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vslide1down.nxv4i32.i32( + , + i32, + i32); + +define @intrinsic_vslide1down_vx_nxv4i32_nxv4i32_i32( %0, i32 %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_vx_nxv4i32_nxv4i32_i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e32,m2,ta,mu +; CHECK-NEXT: vslide1down.vx v16, v16, a0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1down.nxv4i32.i32( + %0, + i32 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vslide1down.mask.nxv4i32.i32( + , + , + i32, + , + i32); + +define @intrinsic_vslide1down_mask_vx_nxv4i32_nxv4i32_i32( %0, %1, i32 %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_mask_vx_nxv4i32_nxv4i32_i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e32,m2,ta,mu +; CHECK-NEXT: vslide1down.vx v16, v18, a0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1down.mask.nxv4i32.i32( + %0, + %1, + i32 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vslide1down.nxv8i32.i32( + , + i32, + i32); + +define @intrinsic_vslide1down_vx_nxv8i32_nxv8i32_i32( %0, i32 %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_vx_nxv8i32_nxv8i32_i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e32,m4,ta,mu +; CHECK-NEXT: vslide1down.vx v16, v16, a0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1down.nxv8i32.i32( + %0, + i32 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vslide1down.mask.nxv8i32.i32( + , + , + i32, + , + i32); + +define @intrinsic_vslide1down_mask_vx_nxv8i32_nxv8i32_i32( %0, %1, i32 %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_mask_vx_nxv8i32_nxv8i32_i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e32,m4,ta,mu +; CHECK-NEXT: vslide1down.vx v16, v20, a0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1down.mask.nxv8i32.i32( + %0, + %1, + i32 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vslide1down.nxv16i32.i32( + , + i32, + i32); + +define @intrinsic_vslide1down_vx_nxv16i32_nxv16i32_i32( %0, i32 %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_vx_nxv16i32_nxv16i32_i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e32,m8,ta,mu +; CHECK-NEXT: vslide1down.vx v16, v16, a0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1down.nxv16i32.i32( + %0, + i32 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vslide1down.mask.nxv16i32.i32( + , + , + i32, + , + i32); + +define @intrinsic_vslide1down_mask_vx_nxv16i32_nxv16i32_i32( %0, %1, i32 %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_mask_vx_nxv16i32_nxv16i32_i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e32,m8,ta,mu +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vsetvli a0, a2, e32,m8,ta,mu +; CHECK-NEXT: vslide1down.vx v16, v8, a1, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1down.mask.nxv16i32.i32( + %0, + %1, + i32 %2, + %3, + i32 %4) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vslide1down-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vslide1down-rv64.ll new file mode 100644 index 0000000000000..c86475c750567 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vslide1down-rv64.ll @@ -0,0 +1,978 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare @llvm.riscv.vslide1down.nxv1i8.i8( + , + i8, + i64); + +define @intrinsic_vslide1down_vx_nxv1i8_nxv1i8_i8( %0, i8 %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_vx_nxv1i8_nxv1i8_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,mf8,ta,mu +; CHECK-NEXT: vslide1down.vx v16, v16, a0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1down.nxv1i8.i8( + %0, + i8 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vslide1down.mask.nxv1i8.i8( + , + , + i8, + , + i64); + +define @intrinsic_vslide1down_mask_vx_nxv1i8_nxv1i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_mask_vx_nxv1i8_nxv1i8_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,mf8,ta,mu +; CHECK-NEXT: vslide1down.vx v16, v17, a0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1down.mask.nxv1i8.i8( + %0, + %1, + i8 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vslide1down.nxv2i8.i8( + , + i8, + i64); + +define @intrinsic_vslide1down_vx_nxv2i8_nxv2i8_i8( %0, i8 %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_vx_nxv2i8_nxv2i8_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,mf4,ta,mu +; CHECK-NEXT: vslide1down.vx v16, v16, a0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1down.nxv2i8.i8( + %0, + i8 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vslide1down.mask.nxv2i8.i8( + , + , + i8, + , + i64); + +define @intrinsic_vslide1down_mask_vx_nxv2i8_nxv2i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_mask_vx_nxv2i8_nxv2i8_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,mf4,ta,mu +; CHECK-NEXT: vslide1down.vx v16, v17, a0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1down.mask.nxv2i8.i8( + %0, + %1, + i8 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vslide1down.nxv4i8.i8( + , + i8, + i64); + +define @intrinsic_vslide1down_vx_nxv4i8_nxv4i8_i8( %0, i8 %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_vx_nxv4i8_nxv4i8_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,mf2,ta,mu +; CHECK-NEXT: vslide1down.vx v16, v16, a0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1down.nxv4i8.i8( + %0, + i8 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vslide1down.mask.nxv4i8.i8( + , + , + i8, + , + i64); + +define @intrinsic_vslide1down_mask_vx_nxv4i8_nxv4i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_mask_vx_nxv4i8_nxv4i8_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,mf2,ta,mu +; CHECK-NEXT: vslide1down.vx v16, v17, a0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1down.mask.nxv4i8.i8( + %0, + %1, + i8 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vslide1down.nxv8i8.i8( + , + i8, + i64); + +define @intrinsic_vslide1down_vx_nxv8i8_nxv8i8_i8( %0, i8 %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_vx_nxv8i8_nxv8i8_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vslide1down.vx v16, v16, a0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1down.nxv8i8.i8( + %0, + i8 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vslide1down.mask.nxv8i8.i8( + , + , + i8, + , + i64); + +define @intrinsic_vslide1down_mask_vx_nxv8i8_nxv8i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_mask_vx_nxv8i8_nxv8i8_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vslide1down.vx v16, v17, a0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1down.mask.nxv8i8.i8( + %0, + %1, + i8 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vslide1down.nxv16i8.i8( + , + i8, + i64); + +define @intrinsic_vslide1down_vx_nxv16i8_nxv16i8_i8( %0, i8 %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_vx_nxv16i8_nxv16i8_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,m2,ta,mu +; CHECK-NEXT: vslide1down.vx v16, v16, a0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1down.nxv16i8.i8( + %0, + i8 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vslide1down.mask.nxv16i8.i8( + , + , + i8, + , + i64); + +define @intrinsic_vslide1down_mask_vx_nxv16i8_nxv16i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_mask_vx_nxv16i8_nxv16i8_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,m2,ta,mu +; CHECK-NEXT: vslide1down.vx v16, v18, a0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1down.mask.nxv16i8.i8( + %0, + %1, + i8 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vslide1down.nxv32i8.i8( + , + i8, + i64); + +define @intrinsic_vslide1down_vx_nxv32i8_nxv32i8_i8( %0, i8 %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_vx_nxv32i8_nxv32i8_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,m4,ta,mu +; CHECK-NEXT: vslide1down.vx v16, v16, a0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1down.nxv32i8.i8( + %0, + i8 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vslide1down.mask.nxv32i8.i8( + , + , + i8, + , + i64); + +define @intrinsic_vslide1down_mask_vx_nxv32i8_nxv32i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_mask_vx_nxv32i8_nxv32i8_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,m4,ta,mu +; CHECK-NEXT: vslide1down.vx v16, v20, a0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1down.mask.nxv32i8.i8( + %0, + %1, + i8 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vslide1down.nxv64i8.i8( + , + i8, + i64); + +define @intrinsic_vslide1down_vx_nxv64i8_nxv64i8_i8( %0, i8 %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_vx_nxv64i8_nxv64i8_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,m8,ta,mu +; CHECK-NEXT: vslide1down.vx v16, v16, a0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1down.nxv64i8.i8( + %0, + i8 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vslide1down.mask.nxv64i8.i8( + , + , + i8, + , + i64); + +define @intrinsic_vslide1down_mask_vx_nxv64i8_nxv64i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_mask_vx_nxv64i8_nxv64i8_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e8,m8,ta,mu +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vsetvli a0, a2, e8,m8,ta,mu +; CHECK-NEXT: vslide1down.vx v16, v8, a1, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1down.mask.nxv64i8.i8( + %0, + %1, + i8 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vslide1down.nxv1i16.i16( + , + i16, + i64); + +define @intrinsic_vslide1down_vx_nxv1i16_nxv1i16_i16( %0, i16 %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_vx_nxv1i16_nxv1i16_i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,mf4,ta,mu +; CHECK-NEXT: vslide1down.vx v16, v16, a0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1down.nxv1i16.i16( + %0, + i16 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vslide1down.mask.nxv1i16.i16( + , + , + i16, + , + i64); + +define @intrinsic_vslide1down_mask_vx_nxv1i16_nxv1i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_mask_vx_nxv1i16_nxv1i16_i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,mf4,ta,mu +; CHECK-NEXT: vslide1down.vx v16, v17, a0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1down.mask.nxv1i16.i16( + %0, + %1, + i16 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vslide1down.nxv2i16.i16( + , + i16, + i64); + +define @intrinsic_vslide1down_vx_nxv2i16_nxv2i16_i16( %0, i16 %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_vx_nxv2i16_nxv2i16_i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,mf2,ta,mu +; CHECK-NEXT: vslide1down.vx v16, v16, a0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1down.nxv2i16.i16( + %0, + i16 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vslide1down.mask.nxv2i16.i16( + , + , + i16, + , + i64); + +define @intrinsic_vslide1down_mask_vx_nxv2i16_nxv2i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_mask_vx_nxv2i16_nxv2i16_i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,mf2,ta,mu +; CHECK-NEXT: vslide1down.vx v16, v17, a0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1down.mask.nxv2i16.i16( + %0, + %1, + i16 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vslide1down.nxv4i16.i16( + , + i16, + i64); + +define @intrinsic_vslide1down_vx_nxv4i16_nxv4i16_i16( %0, i16 %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_vx_nxv4i16_nxv4i16_i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vslide1down.vx v16, v16, a0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1down.nxv4i16.i16( + %0, + i16 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vslide1down.mask.nxv4i16.i16( + , + , + i16, + , + i64); + +define @intrinsic_vslide1down_mask_vx_nxv4i16_nxv4i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_mask_vx_nxv4i16_nxv4i16_i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vslide1down.vx v16, v17, a0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1down.mask.nxv4i16.i16( + %0, + %1, + i16 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vslide1down.nxv8i16.i16( + , + i16, + i64); + +define @intrinsic_vslide1down_vx_nxv8i16_nxv8i16_i16( %0, i16 %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_vx_nxv8i16_nxv8i16_i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,m2,ta,mu +; CHECK-NEXT: vslide1down.vx v16, v16, a0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1down.nxv8i16.i16( + %0, + i16 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vslide1down.mask.nxv8i16.i16( + , + , + i16, + , + i64); + +define @intrinsic_vslide1down_mask_vx_nxv8i16_nxv8i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_mask_vx_nxv8i16_nxv8i16_i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,m2,ta,mu +; CHECK-NEXT: vslide1down.vx v16, v18, a0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1down.mask.nxv8i16.i16( + %0, + %1, + i16 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vslide1down.nxv16i16.i16( + , + i16, + i64); + +define @intrinsic_vslide1down_vx_nxv16i16_nxv16i16_i16( %0, i16 %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_vx_nxv16i16_nxv16i16_i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,m4,ta,mu +; CHECK-NEXT: vslide1down.vx v16, v16, a0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1down.nxv16i16.i16( + %0, + i16 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vslide1down.mask.nxv16i16.i16( + , + , + i16, + , + i64); + +define @intrinsic_vslide1down_mask_vx_nxv16i16_nxv16i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_mask_vx_nxv16i16_nxv16i16_i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,m4,ta,mu +; CHECK-NEXT: vslide1down.vx v16, v20, a0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1down.mask.nxv16i16.i16( + %0, + %1, + i16 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vslide1down.nxv32i16.i16( + , + i16, + i64); + +define @intrinsic_vslide1down_vx_nxv32i16_nxv32i16_i16( %0, i16 %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_vx_nxv32i16_nxv32i16_i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,m8,ta,mu +; CHECK-NEXT: vslide1down.vx v16, v16, a0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1down.nxv32i16.i16( + %0, + i16 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vslide1down.mask.nxv32i16.i16( + , + , + i16, + , + i64); + +define @intrinsic_vslide1down_mask_vx_nxv32i16_nxv32i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_mask_vx_nxv32i16_nxv32i16_i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e16,m8,ta,mu +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vsetvli a0, a2, e16,m8,ta,mu +; CHECK-NEXT: vslide1down.vx v16, v8, a1, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1down.mask.nxv32i16.i16( + %0, + %1, + i16 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vslide1down.nxv1i32.i32( + , + i32, + i64); + +define @intrinsic_vslide1down_vx_nxv1i32_nxv1i32_i32( %0, i32 %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_vx_nxv1i32_nxv1i32_i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e32,mf2,ta,mu +; CHECK-NEXT: vslide1down.vx v16, v16, a0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1down.nxv1i32.i32( + %0, + i32 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vslide1down.mask.nxv1i32.i32( + , + , + i32, + , + i64); + +define @intrinsic_vslide1down_mask_vx_nxv1i32_nxv1i32_i32( %0, %1, i32 %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_mask_vx_nxv1i32_nxv1i32_i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e32,mf2,ta,mu +; CHECK-NEXT: vslide1down.vx v16, v17, a0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1down.mask.nxv1i32.i32( + %0, + %1, + i32 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vslide1down.nxv2i32.i32( + , + i32, + i64); + +define @intrinsic_vslide1down_vx_nxv2i32_nxv2i32_i32( %0, i32 %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_vx_nxv2i32_nxv2i32_i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; CHECK-NEXT: vslide1down.vx v16, v16, a0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1down.nxv2i32.i32( + %0, + i32 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vslide1down.mask.nxv2i32.i32( + , + , + i32, + , + i64); + +define @intrinsic_vslide1down_mask_vx_nxv2i32_nxv2i32_i32( %0, %1, i32 %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_mask_vx_nxv2i32_nxv2i32_i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; CHECK-NEXT: vslide1down.vx v16, v17, a0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1down.mask.nxv2i32.i32( + %0, + %1, + i32 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vslide1down.nxv4i32.i32( + , + i32, + i64); + +define @intrinsic_vslide1down_vx_nxv4i32_nxv4i32_i32( %0, i32 %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_vx_nxv4i32_nxv4i32_i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e32,m2,ta,mu +; CHECK-NEXT: vslide1down.vx v16, v16, a0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1down.nxv4i32.i32( + %0, + i32 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vslide1down.mask.nxv4i32.i32( + , + , + i32, + , + i64); + +define @intrinsic_vslide1down_mask_vx_nxv4i32_nxv4i32_i32( %0, %1, i32 %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_mask_vx_nxv4i32_nxv4i32_i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e32,m2,ta,mu +; CHECK-NEXT: vslide1down.vx v16, v18, a0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1down.mask.nxv4i32.i32( + %0, + %1, + i32 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vslide1down.nxv8i32.i32( + , + i32, + i64); + +define @intrinsic_vslide1down_vx_nxv8i32_nxv8i32_i32( %0, i32 %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_vx_nxv8i32_nxv8i32_i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e32,m4,ta,mu +; CHECK-NEXT: vslide1down.vx v16, v16, a0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1down.nxv8i32.i32( + %0, + i32 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vslide1down.mask.nxv8i32.i32( + , + , + i32, + , + i64); + +define @intrinsic_vslide1down_mask_vx_nxv8i32_nxv8i32_i32( %0, %1, i32 %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_mask_vx_nxv8i32_nxv8i32_i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e32,m4,ta,mu +; CHECK-NEXT: vslide1down.vx v16, v20, a0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1down.mask.nxv8i32.i32( + %0, + %1, + i32 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vslide1down.nxv16i32.i32( + , + i32, + i64); + +define @intrinsic_vslide1down_vx_nxv16i32_nxv16i32_i32( %0, i32 %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_vx_nxv16i32_nxv16i32_i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e32,m8,ta,mu +; CHECK-NEXT: vslide1down.vx v16, v16, a0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1down.nxv16i32.i32( + %0, + i32 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vslide1down.mask.nxv16i32.i32( + , + , + i32, + , + i64); + +define @intrinsic_vslide1down_mask_vx_nxv16i32_nxv16i32_i32( %0, %1, i32 %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_mask_vx_nxv16i32_nxv16i32_i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e32,m8,ta,mu +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vsetvli a0, a2, e32,m8,ta,mu +; CHECK-NEXT: vslide1down.vx v16, v8, a1, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1down.mask.nxv16i32.i32( + %0, + %1, + i32 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vslide1down.nxv1i64.i64( + , + i64, + i64); + +define @intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64( %0, i64 %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e64,m1,ta,mu +; CHECK-NEXT: vslide1down.vx v16, v16, a0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1down.nxv1i64.i64( + %0, + i64 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vslide1down.mask.nxv1i64.i64( + , + , + i64, + , + i64); + +define @intrinsic_vslide1down_mask_vx_nxv1i64_nxv1i64_i64( %0, %1, i64 %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_mask_vx_nxv1i64_nxv1i64_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e64,m1,ta,mu +; CHECK-NEXT: vslide1down.vx v16, v17, a0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1down.mask.nxv1i64.i64( + %0, + %1, + i64 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vslide1down.nxv2i64.i64( + , + i64, + i64); + +define @intrinsic_vslide1down_vx_nxv2i64_nxv2i64_i64( %0, i64 %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_vx_nxv2i64_nxv2i64_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e64,m2,ta,mu +; CHECK-NEXT: vslide1down.vx v16, v16, a0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1down.nxv2i64.i64( + %0, + i64 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vslide1down.mask.nxv2i64.i64( + , + , + i64, + , + i64); + +define @intrinsic_vslide1down_mask_vx_nxv2i64_nxv2i64_i64( %0, %1, i64 %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_mask_vx_nxv2i64_nxv2i64_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e64,m2,ta,mu +; CHECK-NEXT: vslide1down.vx v16, v18, a0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1down.mask.nxv2i64.i64( + %0, + %1, + i64 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vslide1down.nxv4i64.i64( + , + i64, + i64); + +define @intrinsic_vslide1down_vx_nxv4i64_nxv4i64_i64( %0, i64 %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_vx_nxv4i64_nxv4i64_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e64,m4,ta,mu +; CHECK-NEXT: vslide1down.vx v16, v16, a0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1down.nxv4i64.i64( + %0, + i64 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vslide1down.mask.nxv4i64.i64( + , + , + i64, + , + i64); + +define @intrinsic_vslide1down_mask_vx_nxv4i64_nxv4i64_i64( %0, %1, i64 %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_mask_vx_nxv4i64_nxv4i64_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e64,m4,ta,mu +; CHECK-NEXT: vslide1down.vx v16, v20, a0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1down.mask.nxv4i64.i64( + %0, + %1, + i64 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vslide1down.nxv8i64.i64( + , + i64, + i64); + +define @intrinsic_vslide1down_vx_nxv8i64_nxv8i64_i64( %0, i64 %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_vx_nxv8i64_nxv8i64_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e64,m8,ta,mu +; CHECK-NEXT: vslide1down.vx v16, v16, a0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1down.nxv8i64.i64( + %0, + i64 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vslide1down.mask.nxv8i64.i64( + , + , + i64, + , + i64); + +define @intrinsic_vslide1down_mask_vx_nxv8i64_nxv8i64_i64( %0, %1, i64 %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_mask_vx_nxv8i64_nxv8i64_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e64,m8,ta,mu +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vsetvli a0, a2, e64,m8,ta,mu +; CHECK-NEXT: vslide1down.vx v16, v8, a1, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1down.mask.nxv8i64.i64( + %0, + %1, + i64 %2, + %3, + i64 %4) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vslide1up-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vslide1up-rv32.ll new file mode 100644 index 0000000000000..29e2421732641 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vslide1up-rv32.ll @@ -0,0 +1,24 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare @llvm.riscv.vslide1up.nxv1i8.i8( + , + i8, + i32); + +define @intrinsic_vslide1up_vx_nxv1i8_nxv1i8_i8( %0, i8 %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vslide1up_vx_nxv1i8_nxv1i8_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,mf8,ta,mu +; CHECK-NEXT: vslide1up.vx v25, v16, a0 +; CHECK-NEXT: vmv1r.v v16, v25 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1up.nxv1i8.i8( + %0, + i8 %1, + i32 %2) + + ret %a +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/vslide1up-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vslide1up-rv64.ll new file mode 100644 index 0000000000000..f514b60528917 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vslide1up-rv64.ll @@ -0,0 +1,1000 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare @llvm.riscv.vslide1up.nxv1i8.i8( + , + i8, + i64); + +define @intrinsic_vslide1up_vx_nxv1i8_nxv1i8_i8( %0, i8 %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vslide1up_vx_nxv1i8_nxv1i8_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,mf8,ta,mu +; CHECK-NEXT: vslide1up.vx v25, v16, a0 +; CHECK-NEXT: vmv1r.v v16, v25 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1up.nxv1i8.i8( + %0, + i8 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vslide1up.mask.nxv1i8.i8( + , + , + i8, + , + i64); + +define @intrinsic_vslide1up_mask_vx_nxv1i8_nxv1i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vslide1up_mask_vx_nxv1i8_nxv1i8_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,mf8,ta,mu +; CHECK-NEXT: vslide1up.vx v16, v17, a0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1up.mask.nxv1i8.i8( + %0, + %1, + i8 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vslide1up.nxv2i8.i8( + , + i8, + i64); + +define @intrinsic_vslide1up_vx_nxv2i8_nxv2i8_i8( %0, i8 %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vslide1up_vx_nxv2i8_nxv2i8_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,mf4,ta,mu +; CHECK-NEXT: vslide1up.vx v25, v16, a0 +; CHECK-NEXT: vmv1r.v v16, v25 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1up.nxv2i8.i8( + %0, + i8 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vslide1up.mask.nxv2i8.i8( + , + , + i8, + , + i64); + +define @intrinsic_vslide1up_mask_vx_nxv2i8_nxv2i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vslide1up_mask_vx_nxv2i8_nxv2i8_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,mf4,ta,mu +; CHECK-NEXT: vslide1up.vx v16, v17, a0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1up.mask.nxv2i8.i8( + %0, + %1, + i8 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vslide1up.nxv4i8.i8( + , + i8, + i64); + +define @intrinsic_vslide1up_vx_nxv4i8_nxv4i8_i8( %0, i8 %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vslide1up_vx_nxv4i8_nxv4i8_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,mf2,ta,mu +; CHECK-NEXT: vslide1up.vx v25, v16, a0 +; CHECK-NEXT: vmv1r.v v16, v25 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1up.nxv4i8.i8( + %0, + i8 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vslide1up.mask.nxv4i8.i8( + , + , + i8, + , + i64); + +define @intrinsic_vslide1up_mask_vx_nxv4i8_nxv4i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vslide1up_mask_vx_nxv4i8_nxv4i8_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,mf2,ta,mu +; CHECK-NEXT: vslide1up.vx v16, v17, a0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1up.mask.nxv4i8.i8( + %0, + %1, + i8 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vslide1up.nxv8i8.i8( + , + i8, + i64); + +define @intrinsic_vslide1up_vx_nxv8i8_nxv8i8_i8( %0, i8 %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vslide1up_vx_nxv8i8_nxv8i8_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vslide1up.vx v25, v16, a0 +; CHECK-NEXT: vmv1r.v v16, v25 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1up.nxv8i8.i8( + %0, + i8 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vslide1up.mask.nxv8i8.i8( + , + , + i8, + , + i64); + +define @intrinsic_vslide1up_mask_vx_nxv8i8_nxv8i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vslide1up_mask_vx_nxv8i8_nxv8i8_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vslide1up.vx v16, v17, a0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1up.mask.nxv8i8.i8( + %0, + %1, + i8 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vslide1up.nxv16i8.i8( + , + i8, + i64); + +define @intrinsic_vslide1up_vx_nxv16i8_nxv16i8_i8( %0, i8 %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vslide1up_vx_nxv16i8_nxv16i8_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,m2,ta,mu +; CHECK-NEXT: vslide1up.vx v26, v16, a0 +; CHECK-NEXT: vmv2r.v v16, v26 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1up.nxv16i8.i8( + %0, + i8 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vslide1up.mask.nxv16i8.i8( + , + , + i8, + , + i64); + +define @intrinsic_vslide1up_mask_vx_nxv16i8_nxv16i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vslide1up_mask_vx_nxv16i8_nxv16i8_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,m2,ta,mu +; CHECK-NEXT: vslide1up.vx v16, v18, a0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1up.mask.nxv16i8.i8( + %0, + %1, + i8 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vslide1up.nxv32i8.i8( + , + i8, + i64); + +define @intrinsic_vslide1up_vx_nxv32i8_nxv32i8_i8( %0, i8 %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vslide1up_vx_nxv32i8_nxv32i8_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,m4,ta,mu +; CHECK-NEXT: vslide1up.vx v28, v16, a0 +; CHECK-NEXT: vmv4r.v v16, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1up.nxv32i8.i8( + %0, + i8 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vslide1up.mask.nxv32i8.i8( + , + , + i8, + , + i64); + +define @intrinsic_vslide1up_mask_vx_nxv32i8_nxv32i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vslide1up_mask_vx_nxv32i8_nxv32i8_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,m4,ta,mu +; CHECK-NEXT: vslide1up.vx v16, v20, a0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1up.mask.nxv32i8.i8( + %0, + %1, + i8 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vslide1up.nxv64i8.i8( + , + i8, + i64); + +define @intrinsic_vslide1up_vx_nxv64i8_nxv64i8_i8( %0, i8 %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vslide1up_vx_nxv64i8_nxv64i8_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,m8,ta,mu +; CHECK-NEXT: vslide1up.vx v8, v16, a0 +; CHECK-NEXT: vmv8r.v v16, v8 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1up.nxv64i8.i8( + %0, + i8 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vslide1up.mask.nxv64i8.i8( + , + , + i8, + , + i64); + +define @intrinsic_vslide1up_mask_vx_nxv64i8_nxv64i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vslide1up_mask_vx_nxv64i8_nxv64i8_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e8,m8,ta,mu +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vsetvli a0, a2, e8,m8,ta,mu +; CHECK-NEXT: vslide1up.vx v16, v8, a1, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1up.mask.nxv64i8.i8( + %0, + %1, + i8 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vslide1up.nxv1i16.i16( + , + i16, + i64); + +define @intrinsic_vslide1up_vx_nxv1i16_nxv1i16_i16( %0, i16 %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vslide1up_vx_nxv1i16_nxv1i16_i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,mf4,ta,mu +; CHECK-NEXT: vslide1up.vx v25, v16, a0 +; CHECK-NEXT: vmv1r.v v16, v25 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1up.nxv1i16.i16( + %0, + i16 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vslide1up.mask.nxv1i16.i16( + , + , + i16, + , + i64); + +define @intrinsic_vslide1up_mask_vx_nxv1i16_nxv1i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vslide1up_mask_vx_nxv1i16_nxv1i16_i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,mf4,ta,mu +; CHECK-NEXT: vslide1up.vx v16, v17, a0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1up.mask.nxv1i16.i16( + %0, + %1, + i16 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vslide1up.nxv2i16.i16( + , + i16, + i64); + +define @intrinsic_vslide1up_vx_nxv2i16_nxv2i16_i16( %0, i16 %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vslide1up_vx_nxv2i16_nxv2i16_i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,mf2,ta,mu +; CHECK-NEXT: vslide1up.vx v25, v16, a0 +; CHECK-NEXT: vmv1r.v v16, v25 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1up.nxv2i16.i16( + %0, + i16 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vslide1up.mask.nxv2i16.i16( + , + , + i16, + , + i64); + +define @intrinsic_vslide1up_mask_vx_nxv2i16_nxv2i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vslide1up_mask_vx_nxv2i16_nxv2i16_i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,mf2,ta,mu +; CHECK-NEXT: vslide1up.vx v16, v17, a0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1up.mask.nxv2i16.i16( + %0, + %1, + i16 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vslide1up.nxv4i16.i16( + , + i16, + i64); + +define @intrinsic_vslide1up_vx_nxv4i16_nxv4i16_i16( %0, i16 %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vslide1up_vx_nxv4i16_nxv4i16_i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vslide1up.vx v25, v16, a0 +; CHECK-NEXT: vmv1r.v v16, v25 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1up.nxv4i16.i16( + %0, + i16 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vslide1up.mask.nxv4i16.i16( + , + , + i16, + , + i64); + +define @intrinsic_vslide1up_mask_vx_nxv4i16_nxv4i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vslide1up_mask_vx_nxv4i16_nxv4i16_i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vslide1up.vx v16, v17, a0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1up.mask.nxv4i16.i16( + %0, + %1, + i16 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vslide1up.nxv8i16.i16( + , + i16, + i64); + +define @intrinsic_vslide1up_vx_nxv8i16_nxv8i16_i16( %0, i16 %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vslide1up_vx_nxv8i16_nxv8i16_i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,m2,ta,mu +; CHECK-NEXT: vslide1up.vx v26, v16, a0 +; CHECK-NEXT: vmv2r.v v16, v26 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1up.nxv8i16.i16( + %0, + i16 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vslide1up.mask.nxv8i16.i16( + , + , + i16, + , + i64); + +define @intrinsic_vslide1up_mask_vx_nxv8i16_nxv8i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vslide1up_mask_vx_nxv8i16_nxv8i16_i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,m2,ta,mu +; CHECK-NEXT: vslide1up.vx v16, v18, a0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1up.mask.nxv8i16.i16( + %0, + %1, + i16 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vslide1up.nxv16i16.i16( + , + i16, + i64); + +define @intrinsic_vslide1up_vx_nxv16i16_nxv16i16_i16( %0, i16 %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vslide1up_vx_nxv16i16_nxv16i16_i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,m4,ta,mu +; CHECK-NEXT: vslide1up.vx v28, v16, a0 +; CHECK-NEXT: vmv4r.v v16, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1up.nxv16i16.i16( + %0, + i16 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vslide1up.mask.nxv16i16.i16( + , + , + i16, + , + i64); + +define @intrinsic_vslide1up_mask_vx_nxv16i16_nxv16i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vslide1up_mask_vx_nxv16i16_nxv16i16_i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,m4,ta,mu +; CHECK-NEXT: vslide1up.vx v16, v20, a0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1up.mask.nxv16i16.i16( + %0, + %1, + i16 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vslide1up.nxv32i16.i16( + , + i16, + i64); + +define @intrinsic_vslide1up_vx_nxv32i16_nxv32i16_i16( %0, i16 %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vslide1up_vx_nxv32i16_nxv32i16_i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,m8,ta,mu +; CHECK-NEXT: vslide1up.vx v8, v16, a0 +; CHECK-NEXT: vmv8r.v v16, v8 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1up.nxv32i16.i16( + %0, + i16 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vslide1up.mask.nxv32i16.i16( + , + , + i16, + , + i64); + +define @intrinsic_vslide1up_mask_vx_nxv32i16_nxv32i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vslide1up_mask_vx_nxv32i16_nxv32i16_i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e16,m8,ta,mu +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vsetvli a0, a2, e16,m8,ta,mu +; CHECK-NEXT: vslide1up.vx v16, v8, a1, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1up.mask.nxv32i16.i16( + %0, + %1, + i16 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vslide1up.nxv1i32.i32( + , + i32, + i64); + +define @intrinsic_vslide1up_vx_nxv1i32_nxv1i32_i32( %0, i32 %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vslide1up_vx_nxv1i32_nxv1i32_i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e32,mf2,ta,mu +; CHECK-NEXT: vslide1up.vx v25, v16, a0 +; CHECK-NEXT: vmv1r.v v16, v25 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1up.nxv1i32.i32( + %0, + i32 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vslide1up.mask.nxv1i32.i32( + , + , + i32, + , + i64); + +define @intrinsic_vslide1up_mask_vx_nxv1i32_nxv1i32_i32( %0, %1, i32 %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vslide1up_mask_vx_nxv1i32_nxv1i32_i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e32,mf2,ta,mu +; CHECK-NEXT: vslide1up.vx v16, v17, a0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1up.mask.nxv1i32.i32( + %0, + %1, + i32 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vslide1up.nxv2i32.i32( + , + i32, + i64); + +define @intrinsic_vslide1up_vx_nxv2i32_nxv2i32_i32( %0, i32 %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vslide1up_vx_nxv2i32_nxv2i32_i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; CHECK-NEXT: vslide1up.vx v25, v16, a0 +; CHECK-NEXT: vmv1r.v v16, v25 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1up.nxv2i32.i32( + %0, + i32 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vslide1up.mask.nxv2i32.i32( + , + , + i32, + , + i64); + +define @intrinsic_vslide1up_mask_vx_nxv2i32_nxv2i32_i32( %0, %1, i32 %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vslide1up_mask_vx_nxv2i32_nxv2i32_i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; CHECK-NEXT: vslide1up.vx v16, v17, a0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1up.mask.nxv2i32.i32( + %0, + %1, + i32 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vslide1up.nxv4i32.i32( + , + i32, + i64); + +define @intrinsic_vslide1up_vx_nxv4i32_nxv4i32_i32( %0, i32 %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vslide1up_vx_nxv4i32_nxv4i32_i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e32,m2,ta,mu +; CHECK-NEXT: vslide1up.vx v26, v16, a0 +; CHECK-NEXT: vmv2r.v v16, v26 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1up.nxv4i32.i32( + %0, + i32 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vslide1up.mask.nxv4i32.i32( + , + , + i32, + , + i64); + +define @intrinsic_vslide1up_mask_vx_nxv4i32_nxv4i32_i32( %0, %1, i32 %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vslide1up_mask_vx_nxv4i32_nxv4i32_i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e32,m2,ta,mu +; CHECK-NEXT: vslide1up.vx v16, v18, a0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1up.mask.nxv4i32.i32( + %0, + %1, + i32 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vslide1up.nxv8i32.i32( + , + i32, + i64); + +define @intrinsic_vslide1up_vx_nxv8i32_nxv8i32_i32( %0, i32 %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vslide1up_vx_nxv8i32_nxv8i32_i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e32,m4,ta,mu +; CHECK-NEXT: vslide1up.vx v28, v16, a0 +; CHECK-NEXT: vmv4r.v v16, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1up.nxv8i32.i32( + %0, + i32 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vslide1up.mask.nxv8i32.i32( + , + , + i32, + , + i64); + +define @intrinsic_vslide1up_mask_vx_nxv8i32_nxv8i32_i32( %0, %1, i32 %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vslide1up_mask_vx_nxv8i32_nxv8i32_i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e32,m4,ta,mu +; CHECK-NEXT: vslide1up.vx v16, v20, a0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1up.mask.nxv8i32.i32( + %0, + %1, + i32 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vslide1up.nxv16i32.i32( + , + i32, + i64); + +define @intrinsic_vslide1up_vx_nxv16i32_nxv16i32_i32( %0, i32 %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vslide1up_vx_nxv16i32_nxv16i32_i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e32,m8,ta,mu +; CHECK-NEXT: vslide1up.vx v8, v16, a0 +; CHECK-NEXT: vmv8r.v v16, v8 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1up.nxv16i32.i32( + %0, + i32 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vslide1up.mask.nxv16i32.i32( + , + , + i32, + , + i64); + +define @intrinsic_vslide1up_mask_vx_nxv16i32_nxv16i32_i32( %0, %1, i32 %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vslide1up_mask_vx_nxv16i32_nxv16i32_i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e32,m8,ta,mu +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vsetvli a0, a2, e32,m8,ta,mu +; CHECK-NEXT: vslide1up.vx v16, v8, a1, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1up.mask.nxv16i32.i32( + %0, + %1, + i32 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vslide1up.nxv1i64.i64( + , + i64, + i64); + +define @intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64( %0, i64 %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e64,m1,ta,mu +; CHECK-NEXT: vslide1up.vx v25, v16, a0 +; CHECK-NEXT: vmv1r.v v16, v25 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1up.nxv1i64.i64( + %0, + i64 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vslide1up.mask.nxv1i64.i64( + , + , + i64, + , + i64); + +define @intrinsic_vslide1up_mask_vx_nxv1i64_nxv1i64_i64( %0, %1, i64 %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vslide1up_mask_vx_nxv1i64_nxv1i64_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e64,m1,ta,mu +; CHECK-NEXT: vslide1up.vx v16, v17, a0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1up.mask.nxv1i64.i64( + %0, + %1, + i64 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vslide1up.nxv2i64.i64( + , + i64, + i64); + +define @intrinsic_vslide1up_vx_nxv2i64_nxv2i64_i64( %0, i64 %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vslide1up_vx_nxv2i64_nxv2i64_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e64,m2,ta,mu +; CHECK-NEXT: vslide1up.vx v26, v16, a0 +; CHECK-NEXT: vmv2r.v v16, v26 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1up.nxv2i64.i64( + %0, + i64 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vslide1up.mask.nxv2i64.i64( + , + , + i64, + , + i64); + +define @intrinsic_vslide1up_mask_vx_nxv2i64_nxv2i64_i64( %0, %1, i64 %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vslide1up_mask_vx_nxv2i64_nxv2i64_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e64,m2,ta,mu +; CHECK-NEXT: vslide1up.vx v16, v18, a0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1up.mask.nxv2i64.i64( + %0, + %1, + i64 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vslide1up.nxv4i64.i64( + , + i64, + i64); + +define @intrinsic_vslide1up_vx_nxv4i64_nxv4i64_i64( %0, i64 %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vslide1up_vx_nxv4i64_nxv4i64_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e64,m4,ta,mu +; CHECK-NEXT: vslide1up.vx v28, v16, a0 +; CHECK-NEXT: vmv4r.v v16, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1up.nxv4i64.i64( + %0, + i64 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vslide1up.mask.nxv4i64.i64( + , + , + i64, + , + i64); + +define @intrinsic_vslide1up_mask_vx_nxv4i64_nxv4i64_i64( %0, %1, i64 %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vslide1up_mask_vx_nxv4i64_nxv4i64_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e64,m4,ta,mu +; CHECK-NEXT: vslide1up.vx v16, v20, a0, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1up.mask.nxv4i64.i64( + %0, + %1, + i64 %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vslide1up.nxv8i64.i64( + , + i64, + i64); + +define @intrinsic_vslide1up_vx_nxv8i64_nxv8i64_i64( %0, i64 %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vslide1up_vx_nxv8i64_nxv8i64_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e64,m8,ta,mu +; CHECK-NEXT: vslide1up.vx v8, v16, a0 +; CHECK-NEXT: vmv8r.v v16, v8 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1up.nxv8i64.i64( + %0, + i64 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vslide1up.mask.nxv8i64.i64( + , + , + i64, + , + i64); + +define @intrinsic_vslide1up_mask_vx_nxv8i64_nxv8i64_i64( %0, %1, i64 %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vslide1up_mask_vx_nxv8i64_nxv8i64_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e64,m8,ta,mu +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vsetvli a0, a2, e64,m8,ta,mu +; CHECK-NEXT: vslide1up.vx v16, v8, a1, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1up.mask.nxv8i64.i64( + %0, + %1, + i64 %2, + %3, + i64 %4) + + ret %a +} From 42687839980308bbed8fe909b9810a0fb48f9813 Mon Sep 17 00:00:00 2001 From: ShihPo Hung Date: Tue, 22 Dec 2020 00:01:46 -0800 Subject: [PATCH 146/378] [RISCV] Add intrinsics for vwmacc[u|su|us] instructions This patch defines vwmacc[u|su|us] intrinsics and lower to V instructions. We work with @rogfer01 from BSC to come out this patch. Authored-by: Roger Ferrer Ibanez Co-Authored-by: ShihPo Hung Differential Revision: https://reviews.llvm.org/D93675 --- llvm/include/llvm/IR/IntrinsicsRISCV.td | 23 + .../Target/RISCV/RISCVInstrInfoVPseudos.td | 63 + llvm/test/CodeGen/RISCV/rvv/vwmacc-rv32.ll | 1034 ++++++++++++ llvm/test/CodeGen/RISCV/rvv/vwmacc-rv64.ll | 1412 +++++++++++++++++ llvm/test/CodeGen/RISCV/rvv/vwmaccsu-rv32.ll | 1034 ++++++++++++ llvm/test/CodeGen/RISCV/rvv/vwmaccsu-rv64.ll | 1412 +++++++++++++++++ llvm/test/CodeGen/RISCV/rvv/vwmaccu-rv32.ll | 1034 ++++++++++++ llvm/test/CodeGen/RISCV/rvv/vwmaccu-rv64.ll | 1412 +++++++++++++++++ llvm/test/CodeGen/RISCV/rvv/vwmaccus-rv32.ll | 516 ++++++ llvm/test/CodeGen/RISCV/rvv/vwmaccus-rv64.ll | 704 ++++++++ 10 files changed, 8644 insertions(+) create mode 100644 llvm/test/CodeGen/RISCV/rvv/vwmacc-rv32.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vwmacc-rv64.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vwmaccsu-rv32.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vwmaccsu-rv64.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vwmaccu-rv32.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vwmaccu-rv64.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vwmaccus-rv32.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vwmaccus-rv64.ll diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td index f2aed28440ccb..ba0929b16ea59 100644 --- a/llvm/include/llvm/IR/IntrinsicsRISCV.td +++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td @@ -338,6 +338,20 @@ let TargetPrefix = "riscv" in { [IntrNoMem]>, RISCVVIntrinsic { let ExtendOperand = 2; } + class RISCVTernaryWideNoMask + : Intrinsic< [llvm_anyvector_ty], + [LLVMMatchType<0>, llvm_any_ty, llvm_anyvector_ty, + llvm_anyint_ty], + [IntrNoMem] >, RISCVVIntrinsic { + let ExtendOperand = 2; + } + class RISCVTernaryWideMask + : Intrinsic< [llvm_anyvector_ty], + [LLVMMatchType<0>, llvm_any_ty, llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty], + [IntrNoMem]>, RISCVVIntrinsic { + let ExtendOperand = 2; + } multiclass RISCVUSLoad { def "int_riscv_" # NAME : RISCVUSLoad; @@ -406,6 +420,10 @@ let TargetPrefix = "riscv" in { def "int_riscv_" # NAME : RISCVCompareNoMask; def "int_riscv_" # NAME # "_mask" : RISCVCompareMask; } + multiclass RISCVTernaryWide { + def "int_riscv_" # NAME : RISCVTernaryWideNoMask; + def "int_riscv_" # NAME # "_mask" : RISCVTernaryWideMask; + } defm vle : RISCVUSLoad; defm vleff : RISCVUSLoad; @@ -481,6 +499,11 @@ let TargetPrefix = "riscv" in { defm vmadd : RISCVTernaryAAXA; defm vnmsub : RISCVTernaryAAXA; + defm vwmaccu : RISCVTernaryWide; + defm vwmacc : RISCVTernaryWide; + defm vwmaccus : RISCVTernaryWide; + defm vwmaccsu : RISCVTernaryWide; + defm vfadd : RISCVBinaryAAX; defm vfsub : RISCVBinaryAAX; defm vfrsub : RISCVBinaryAAX; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td index 7a3e33d6d7db6..f3b6d2f5867f8 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -890,6 +890,18 @@ multiclass VPseudoTernaryV_VX_AAXA { defm _VX : VPseudoTernary; } +multiclass VPseudoTernaryW_VV { + defvar constraint = "@earlyclobber $rd"; + foreach m = MxList.m in + defm _VV : VPseudoTernary; +} + +multiclass VPseudoTernaryW_VX { + defvar constraint = "@earlyclobber $rd"; + foreach m = MxList.m in + defm _VX : VPseudoTernary; +} + multiclass VPseudoTernaryV_VI { foreach m = MxList.m in defm _VI : VPseudoTernary; @@ -905,6 +917,11 @@ multiclass VPseudoTernaryV_VX_VI; } +multiclass VPseudoTernaryW_VV_VX { + defm "" : VPseudoTernaryW_VV; + defm "" : VPseudoTernaryW_VX; +} + multiclass VPseudoBinaryM_VV_VX_VI { defm "" : VPseudoBinaryM_VV; defm "" : VPseudoBinaryM_VX; @@ -1591,6 +1608,30 @@ multiclass VPatTernaryV_VI; } +multiclass VPatTernaryW_VV vtilist> { + foreach vtiToWti = vtilist in { + defvar vti = vtiToWti.Vti; + defvar wti = vtiToWti.Wti; + defm : VPatTernary; + } +} + +multiclass VPatTernaryW_VX vtilist> { + foreach vtiToWti = vtilist in { + defvar vti = vtiToWti.Vti; + defvar wti = vtiToWti.Wti; + defm : VPatTernary; + } +} + multiclass VPatTernaryV_VV_VX_AAXA vtilist> { defm "" : VPatTernaryV_VV; @@ -1611,6 +1652,12 @@ multiclass VPatBinaryM_VV_VX_VI; } +multiclass VPatTernaryW_VV_VX vtilist> { + defm "" : VPatTernaryW_VV; + defm "" : VPatTernaryW_VX; +} + multiclass VPatBinaryM_VV_VX vtilist> { @@ -1805,6 +1852,14 @@ defm PseudoVNMSAC : VPseudoTernaryV_VV_VX_AAXA; defm PseudoVMADD : VPseudoTernaryV_VV_VX_AAXA; defm PseudoVNMSUB : VPseudoTernaryV_VV_VX_AAXA; +//===----------------------------------------------------------------------===// +// 12.14. Vector Widening Integer Multiply-Add Instructions +//===----------------------------------------------------------------------===// +defm PseudoVWMACCU : VPseudoTernaryW_VV_VX; +defm PseudoVWMACC : VPseudoTernaryW_VV_VX; +defm PseudoVWMACCSU : VPseudoTernaryW_VV_VX; +defm PseudoVWMACCUS : VPseudoTernaryW_VX; + //===----------------------------------------------------------------------===// // 12.17. Vector Integer Move Instructions //===----------------------------------------------------------------------===// @@ -2173,6 +2228,14 @@ defm "" : VPatTernaryV_VV_VX_AAXA<"int_riscv_vnmsub", "PseudoVNMSUB", AllInteger defm "" : VPatTernaryV_VV_VX_AAXA<"int_riscv_vmacc", "PseudoVMACC", AllIntegerVectors>; defm "" : VPatTernaryV_VV_VX_AAXA<"int_riscv_vnmsac", "PseudoVNMSAC", AllIntegerVectors>; +//===----------------------------------------------------------------------===// +// 12.14. Vector Widening Integer Multiply-Add Instructions +//===----------------------------------------------------------------------===// +defm "" : VPatTernaryW_VV_VX<"int_riscv_vwmaccu", "PseudoVWMACCU", AllWidenableIntVectors>; +defm "" : VPatTernaryW_VV_VX<"int_riscv_vwmacc", "PseudoVWMACC", AllWidenableIntVectors>; +defm "" : VPatTernaryW_VV_VX<"int_riscv_vwmaccsu", "PseudoVWMACCSU", AllWidenableIntVectors>; +defm "" : VPatTernaryW_VX<"int_riscv_vwmaccus", "PseudoVWMACCUS", AllWidenableIntVectors>; + //===----------------------------------------------------------------------===// // 12.17. Vector Integer Move Instructions //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/RISCV/rvv/vwmacc-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vwmacc-rv32.ll new file mode 100644 index 0000000000000..539177f8d78e2 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vwmacc-rv32.ll @@ -0,0 +1,1034 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare @llvm.riscv.vwmacc.nxv1i16.nxv1i8( + , + , + , + i32); + +define @intrinsic_vwmacc_vv_nxv1i16_nxv1i8_nxv1i8( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_vv_nxv1i16_nxv1i8_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e8,mf8,ta,mu +; CHECK-NEXT: vwmacc.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.nxv1i16.nxv1i8( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vwmacc.mask.nxv1i16.nxv1i8( + , + , + , + , + i32); + +define @intrinsic_vwmacc_mask_vv_nxv1i16_nxv1i8_nxv1i8( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_mask_vv_nxv1i16_nxv1i8_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e8,mf8,ta,mu +; CHECK-NEXT: vwmacc.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.mask.nxv1i16.nxv1i8( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vwmacc.nxv2i16.nxv2i8( + , + , + , + i32); + +define @intrinsic_vwmacc_vv_nxv2i16_nxv2i8_nxv2i8( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_vv_nxv2i16_nxv2i8_nxv2i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e8,mf4,ta,mu +; CHECK-NEXT: vwmacc.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.nxv2i16.nxv2i8( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vwmacc.mask.nxv2i16.nxv2i8( + , + , + , + , + i32); + +define @intrinsic_vwmacc_mask_vv_nxv2i16_nxv2i8_nxv2i8( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_mask_vv_nxv2i16_nxv2i8_nxv2i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e8,mf4,ta,mu +; CHECK-NEXT: vwmacc.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.mask.nxv2i16.nxv2i8( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vwmacc.nxv4i16.nxv4i8( + , + , + , + i32); + +define @intrinsic_vwmacc_vv_nxv4i16_nxv4i8_nxv4i8( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_vv_nxv4i16_nxv4i8_nxv4i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e8,mf2,ta,mu +; CHECK-NEXT: vwmacc.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.nxv4i16.nxv4i8( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vwmacc.mask.nxv4i16.nxv4i8( + , + , + , + , + i32); + +define @intrinsic_vwmacc_mask_vv_nxv4i16_nxv4i8_nxv4i8( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_mask_vv_nxv4i16_nxv4i8_nxv4i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e8,mf2,ta,mu +; CHECK-NEXT: vwmacc.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.mask.nxv4i16.nxv4i8( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vwmacc.nxv8i16.nxv8i8( + , + , + , + i32); + +define @intrinsic_vwmacc_vv_nxv8i16_nxv8i8_nxv8i8( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_vv_nxv8i16_nxv8i8_nxv8i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu +; CHECK-NEXT: vwmacc.vv v16, v18, v19 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.nxv8i16.nxv8i8( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vwmacc.mask.nxv8i16.nxv8i8( + , + , + , + , + i32); + +define @intrinsic_vwmacc_mask_vv_nxv8i16_nxv8i8_nxv8i8( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_mask_vv_nxv8i16_nxv8i8_nxv8i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu +; CHECK-NEXT: vwmacc.vv v16, v18, v19, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.mask.nxv8i16.nxv8i8( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vwmacc.nxv16i16.nxv16i8( + , + , + , + i32); + +define @intrinsic_vwmacc_vv_nxv16i16_nxv16i8_nxv16i8( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_vv_nxv16i16_nxv16i8_nxv16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e8,m2,ta,mu +; CHECK-NEXT: vwmacc.vv v16, v20, v22 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.nxv16i16.nxv16i8( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vwmacc.mask.nxv16i16.nxv16i8( + , + , + , + , + i32); + +define @intrinsic_vwmacc_mask_vv_nxv16i16_nxv16i8_nxv16i8( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_mask_vv_nxv16i16_nxv16i8_nxv16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e8,m2,ta,mu +; CHECK-NEXT: vwmacc.vv v16, v20, v22, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.mask.nxv16i16.nxv16i8( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vwmacc.nxv32i16.nxv32i8( + , + , + , + i32); + +define @intrinsic_vwmacc_vv_nxv32i16_nxv32i8_nxv32i8( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_vv_nxv32i16_nxv32i8_nxv32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e8,m4,ta,mu +; CHECK-NEXT: vle8.v v28, (a1) +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vsetvli a0, a2, e8,m4,ta,mu +; CHECK-NEXT: vwmacc.vv v16, v8, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.nxv32i16.nxv32i8( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vwmacc.mask.nxv32i16.nxv32i8( + , + , + , + , + i32); + +define @intrinsic_vwmacc_mask_vv_nxv32i16_nxv32i8_nxv32i8( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_mask_vv_nxv32i16_nxv32i8_nxv32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e8,m4,ta,mu +; CHECK-NEXT: vle8.v v28, (a1) +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vsetvli a0, a2, e8,m4,ta,mu +; CHECK-NEXT: vwmacc.vv v16, v8, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.mask.nxv32i16.nxv32i8( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vwmacc.nxv1i32.nxv1i16( + , + , + , + i32); + +define @intrinsic_vwmacc_vv_nxv1i32_nxv1i16_nxv1i16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_vv_nxv1i32_nxv1i16_nxv1i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu +; CHECK-NEXT: vwmacc.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.nxv1i32.nxv1i16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vwmacc.mask.nxv1i32.nxv1i16( + , + , + , + , + i32); + +define @intrinsic_vwmacc_mask_vv_nxv1i32_nxv1i16_nxv1i16( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_mask_vv_nxv1i32_nxv1i16_nxv1i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu +; CHECK-NEXT: vwmacc.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.mask.nxv1i32.nxv1i16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vwmacc.nxv2i32.nxv2i16( + , + , + , + i32); + +define @intrinsic_vwmacc_vv_nxv2i32_nxv2i16_nxv2i16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_vv_nxv2i32_nxv2i16_nxv2i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu +; CHECK-NEXT: vwmacc.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.nxv2i32.nxv2i16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vwmacc.mask.nxv2i32.nxv2i16( + , + , + , + , + i32); + +define @intrinsic_vwmacc_mask_vv_nxv2i32_nxv2i16_nxv2i16( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_mask_vv_nxv2i32_nxv2i16_nxv2i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu +; CHECK-NEXT: vwmacc.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.mask.nxv2i32.nxv2i16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vwmacc.nxv4i32.nxv4i16( + , + , + , + i32); + +define @intrinsic_vwmacc_vv_nxv4i32_nxv4i16_nxv4i16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_vv_nxv4i32_nxv4i16_nxv4i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vwmacc.vv v16, v18, v19 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.nxv4i32.nxv4i16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vwmacc.mask.nxv4i32.nxv4i16( + , + , + , + , + i32); + +define @intrinsic_vwmacc_mask_vv_nxv4i32_nxv4i16_nxv4i16( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_mask_vv_nxv4i32_nxv4i16_nxv4i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vwmacc.vv v16, v18, v19, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.mask.nxv4i32.nxv4i16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vwmacc.nxv8i32.nxv8i16( + , + , + , + i32); + +define @intrinsic_vwmacc_vv_nxv8i32_nxv8i16_nxv8i16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_vv_nxv8i32_nxv8i16_nxv8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu +; CHECK-NEXT: vwmacc.vv v16, v20, v22 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.nxv8i32.nxv8i16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vwmacc.mask.nxv8i32.nxv8i16( + , + , + , + , + i32); + +define @intrinsic_vwmacc_mask_vv_nxv8i32_nxv8i16_nxv8i16( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_mask_vv_nxv8i32_nxv8i16_nxv8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu +; CHECK-NEXT: vwmacc.vv v16, v20, v22, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.mask.nxv8i32.nxv8i16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vwmacc.nxv16i32.nxv16i16( + , + , + , + i32); + +define @intrinsic_vwmacc_vv_nxv16i32_nxv16i16_nxv16i16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_vv_nxv16i32_nxv16i16_nxv16i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a1) +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vsetvli a0, a2, e16,m4,ta,mu +; CHECK-NEXT: vwmacc.vv v16, v8, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.nxv16i32.nxv16i16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vwmacc.mask.nxv16i32.nxv16i16( + , + , + , + , + i32); + +define @intrinsic_vwmacc_mask_vv_nxv16i32_nxv16i16_nxv16i16( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_mask_vv_nxv16i32_nxv16i16_nxv16i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a1) +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vsetvli a0, a2, e16,m4,ta,mu +; CHECK-NEXT: vwmacc.vv v16, v8, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.mask.nxv16i32.nxv16i16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vwmacc.nxv1i16.i8( + , + i8, + , + i32); + +define @intrinsic_vwmacc_vx_nxv1i16_i8_nxv1i8( %0, i8 %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_vx_nxv1i16_i8_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,mf8,ta,mu +; CHECK-NEXT: vwmacc.vx v16, a0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.nxv1i16.i8( + %0, + i8 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vwmacc.mask.nxv1i16.i8( + , + i8, + , + , + i32); + +define @intrinsic_vwmacc_mask_vx_nxv1i16_i8_nxv1i8( %0, i8 %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_mask_vx_nxv1i16_i8_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,mf8,ta,mu +; CHECK-NEXT: vwmacc.vx v16, a0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.mask.nxv1i16.i8( + %0, + i8 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vwmacc.nxv2i16.i8( + , + i8, + , + i32); + +define @intrinsic_vwmacc_vx_nxv2i16_i8_nxv2i8( %0, i8 %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_vx_nxv2i16_i8_nxv2i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,mf4,ta,mu +; CHECK-NEXT: vwmacc.vx v16, a0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.nxv2i16.i8( + %0, + i8 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vwmacc.mask.nxv2i16.i8( + , + i8, + , + , + i32); + +define @intrinsic_vwmacc_mask_vx_nxv2i16_i8_nxv2i8( %0, i8 %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_mask_vx_nxv2i16_i8_nxv2i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,mf4,ta,mu +; CHECK-NEXT: vwmacc.vx v16, a0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.mask.nxv2i16.i8( + %0, + i8 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vwmacc.nxv4i16.i8( + , + i8, + , + i32); + +define @intrinsic_vwmacc_vx_nxv4i16_i8_nxv4i8( %0, i8 %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_vx_nxv4i16_i8_nxv4i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,mf2,ta,mu +; CHECK-NEXT: vwmacc.vx v16, a0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.nxv4i16.i8( + %0, + i8 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vwmacc.mask.nxv4i16.i8( + , + i8, + , + , + i32); + +define @intrinsic_vwmacc_mask_vx_nxv4i16_i8_nxv4i8( %0, i8 %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_mask_vx_nxv4i16_i8_nxv4i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,mf2,ta,mu +; CHECK-NEXT: vwmacc.vx v16, a0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.mask.nxv4i16.i8( + %0, + i8 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vwmacc.nxv8i16.i8( + , + i8, + , + i32); + +define @intrinsic_vwmacc_vx_nxv8i16_i8_nxv8i8( %0, i8 %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_vx_nxv8i16_i8_nxv8i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vwmacc.vx v16, a0, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.nxv8i16.i8( + %0, + i8 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vwmacc.mask.nxv8i16.i8( + , + i8, + , + , + i32); + +define @intrinsic_vwmacc_mask_vx_nxv8i16_i8_nxv8i8( %0, i8 %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_mask_vx_nxv8i16_i8_nxv8i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vwmacc.vx v16, a0, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.mask.nxv8i16.i8( + %0, + i8 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vwmacc.nxv16i16.i8( + , + i8, + , + i32); + +define @intrinsic_vwmacc_vx_nxv16i16_i8_nxv16i8( %0, i8 %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_vx_nxv16i16_i8_nxv16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,m2,ta,mu +; CHECK-NEXT: vwmacc.vx v16, a0, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.nxv16i16.i8( + %0, + i8 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vwmacc.mask.nxv16i16.i8( + , + i8, + , + , + i32); + +define @intrinsic_vwmacc_mask_vx_nxv16i16_i8_nxv16i8( %0, i8 %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_mask_vx_nxv16i16_i8_nxv16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,m2,ta,mu +; CHECK-NEXT: vwmacc.vx v16, a0, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.mask.nxv16i16.i8( + %0, + i8 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vwmacc.nxv32i16.i8( + , + i8, + , + i32); + +define @intrinsic_vwmacc_vx_nxv32i16_i8_nxv32i8( %0, i8 %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_vx_nxv32i16_i8_nxv32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e8,m4,ta,mu +; CHECK-NEXT: vle8.v v28, (a1) +; CHECK-NEXT: vsetvli a1, a2, e8,m4,ta,mu +; CHECK-NEXT: vwmacc.vx v16, a0, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.nxv32i16.i8( + %0, + i8 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vwmacc.mask.nxv32i16.i8( + , + i8, + , + , + i32); + +define @intrinsic_vwmacc_mask_vx_nxv32i16_i8_nxv32i8( %0, i8 %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_mask_vx_nxv32i16_i8_nxv32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e8,m4,ta,mu +; CHECK-NEXT: vle8.v v28, (a1) +; CHECK-NEXT: vsetvli a1, a2, e8,m4,ta,mu +; CHECK-NEXT: vwmacc.vx v16, a0, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.mask.nxv32i16.i8( + %0, + i8 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vwmacc.nxv1i32.i16( + , + i16, + , + i32); + +define @intrinsic_vwmacc_vx_nxv1i32_i16_nxv1i16( %0, i16 %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_vx_nxv1i32_i16_nxv1i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,mf4,ta,mu +; CHECK-NEXT: vwmacc.vx v16, a0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.nxv1i32.i16( + %0, + i16 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vwmacc.mask.nxv1i32.i16( + , + i16, + , + , + i32); + +define @intrinsic_vwmacc_mask_vx_nxv1i32_i16_nxv1i16( %0, i16 %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_mask_vx_nxv1i32_i16_nxv1i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,mf4,ta,mu +; CHECK-NEXT: vwmacc.vx v16, a0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.mask.nxv1i32.i16( + %0, + i16 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vwmacc.nxv2i32.i16( + , + i16, + , + i32); + +define @intrinsic_vwmacc_vx_nxv2i32_i16_nxv2i16( %0, i16 %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_vx_nxv2i32_i16_nxv2i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,mf2,ta,mu +; CHECK-NEXT: vwmacc.vx v16, a0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.nxv2i32.i16( + %0, + i16 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vwmacc.mask.nxv2i32.i16( + , + i16, + , + , + i32); + +define @intrinsic_vwmacc_mask_vx_nxv2i32_i16_nxv2i16( %0, i16 %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_mask_vx_nxv2i32_i16_nxv2i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,mf2,ta,mu +; CHECK-NEXT: vwmacc.vx v16, a0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.mask.nxv2i32.i16( + %0, + i16 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vwmacc.nxv4i32.i16( + , + i16, + , + i32); + +define @intrinsic_vwmacc_vx_nxv4i32_i16_nxv4i16( %0, i16 %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_vx_nxv4i32_i16_nxv4i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vwmacc.vx v16, a0, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.nxv4i32.i16( + %0, + i16 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vwmacc.mask.nxv4i32.i16( + , + i16, + , + , + i32); + +define @intrinsic_vwmacc_mask_vx_nxv4i32_i16_nxv4i16( %0, i16 %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_mask_vx_nxv4i32_i16_nxv4i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vwmacc.vx v16, a0, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.mask.nxv4i32.i16( + %0, + i16 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vwmacc.nxv8i32.i16( + , + i16, + , + i32); + +define @intrinsic_vwmacc_vx_nxv8i32_i16_nxv8i16( %0, i16 %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_vx_nxv8i32_i16_nxv8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,m2,ta,mu +; CHECK-NEXT: vwmacc.vx v16, a0, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.nxv8i32.i16( + %0, + i16 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vwmacc.mask.nxv8i32.i16( + , + i16, + , + , + i32); + +define @intrinsic_vwmacc_mask_vx_nxv8i32_i16_nxv8i16( %0, i16 %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_mask_vx_nxv8i32_i16_nxv8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,m2,ta,mu +; CHECK-NEXT: vwmacc.vx v16, a0, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.mask.nxv8i32.i16( + %0, + i16 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vwmacc.nxv16i32.i16( + , + i16, + , + i32); + +define @intrinsic_vwmacc_vx_nxv16i32_i16_nxv16i16( %0, i16 %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_vx_nxv16i32_i16_nxv16i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a1) +; CHECK-NEXT: vsetvli a1, a2, e16,m4,ta,mu +; CHECK-NEXT: vwmacc.vx v16, a0, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.nxv16i32.i16( + %0, + i16 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vwmacc.mask.nxv16i32.i16( + , + i16, + , + , + i32); + +define @intrinsic_vwmacc_mask_vx_nxv16i32_i16_nxv16i16( %0, i16 %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_mask_vx_nxv16i32_i16_nxv16i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a1) +; CHECK-NEXT: vsetvli a1, a2, e16,m4,ta,mu +; CHECK-NEXT: vwmacc.vx v16, a0, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.mask.nxv16i32.i16( + %0, + i16 %1, + %2, + %3, + i32 %4) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vwmacc-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vwmacc-rv64.ll new file mode 100644 index 0000000000000..a6ed911b77365 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vwmacc-rv64.ll @@ -0,0 +1,1412 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare @llvm.riscv.vwmacc.nxv1i16.nxv1i8( + , + , + , + i64); + +define @intrinsic_vwmacc_vv_nxv1i16_nxv1i8_nxv1i8( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_vv_nxv1i16_nxv1i8_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e8,mf8,ta,mu +; CHECK-NEXT: vwmacc.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.nxv1i16.nxv1i8( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmacc.mask.nxv1i16.nxv1i8( + , + , + , + , + i64); + +define @intrinsic_vwmacc_mask_vv_nxv1i16_nxv1i8_nxv1i8( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_mask_vv_nxv1i16_nxv1i8_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e8,mf8,ta,mu +; CHECK-NEXT: vwmacc.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.mask.nxv1i16.nxv1i8( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmacc.nxv2i16.nxv2i8( + , + , + , + i64); + +define @intrinsic_vwmacc_vv_nxv2i16_nxv2i8_nxv2i8( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_vv_nxv2i16_nxv2i8_nxv2i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e8,mf4,ta,mu +; CHECK-NEXT: vwmacc.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.nxv2i16.nxv2i8( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmacc.mask.nxv2i16.nxv2i8( + , + , + , + , + i64); + +define @intrinsic_vwmacc_mask_vv_nxv2i16_nxv2i8_nxv2i8( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_mask_vv_nxv2i16_nxv2i8_nxv2i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e8,mf4,ta,mu +; CHECK-NEXT: vwmacc.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.mask.nxv2i16.nxv2i8( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmacc.nxv4i16.nxv4i8( + , + , + , + i64); + +define @intrinsic_vwmacc_vv_nxv4i16_nxv4i8_nxv4i8( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_vv_nxv4i16_nxv4i8_nxv4i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e8,mf2,ta,mu +; CHECK-NEXT: vwmacc.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.nxv4i16.nxv4i8( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmacc.mask.nxv4i16.nxv4i8( + , + , + , + , + i64); + +define @intrinsic_vwmacc_mask_vv_nxv4i16_nxv4i8_nxv4i8( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_mask_vv_nxv4i16_nxv4i8_nxv4i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e8,mf2,ta,mu +; CHECK-NEXT: vwmacc.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.mask.nxv4i16.nxv4i8( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmacc.nxv8i16.nxv8i8( + , + , + , + i64); + +define @intrinsic_vwmacc_vv_nxv8i16_nxv8i8_nxv8i8( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_vv_nxv8i16_nxv8i8_nxv8i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu +; CHECK-NEXT: vwmacc.vv v16, v18, v19 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.nxv8i16.nxv8i8( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmacc.mask.nxv8i16.nxv8i8( + , + , + , + , + i64); + +define @intrinsic_vwmacc_mask_vv_nxv8i16_nxv8i8_nxv8i8( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_mask_vv_nxv8i16_nxv8i8_nxv8i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu +; CHECK-NEXT: vwmacc.vv v16, v18, v19, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.mask.nxv8i16.nxv8i8( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmacc.nxv16i16.nxv16i8( + , + , + , + i64); + +define @intrinsic_vwmacc_vv_nxv16i16_nxv16i8_nxv16i8( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_vv_nxv16i16_nxv16i8_nxv16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e8,m2,ta,mu +; CHECK-NEXT: vwmacc.vv v16, v20, v22 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.nxv16i16.nxv16i8( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmacc.mask.nxv16i16.nxv16i8( + , + , + , + , + i64); + +define @intrinsic_vwmacc_mask_vv_nxv16i16_nxv16i8_nxv16i8( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_mask_vv_nxv16i16_nxv16i8_nxv16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e8,m2,ta,mu +; CHECK-NEXT: vwmacc.vv v16, v20, v22, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.mask.nxv16i16.nxv16i8( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmacc.nxv32i16.nxv32i8( + , + , + , + i64); + +define @intrinsic_vwmacc_vv_nxv32i16_nxv32i8_nxv32i8( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_vv_nxv32i16_nxv32i8_nxv32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e8,m4,ta,mu +; CHECK-NEXT: vle8.v v28, (a1) +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vsetvli a0, a2, e8,m4,ta,mu +; CHECK-NEXT: vwmacc.vv v16, v8, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.nxv32i16.nxv32i8( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmacc.mask.nxv32i16.nxv32i8( + , + , + , + , + i64); + +define @intrinsic_vwmacc_mask_vv_nxv32i16_nxv32i8_nxv32i8( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_mask_vv_nxv32i16_nxv32i8_nxv32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e8,m4,ta,mu +; CHECK-NEXT: vle8.v v28, (a1) +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vsetvli a0, a2, e8,m4,ta,mu +; CHECK-NEXT: vwmacc.vv v16, v8, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.mask.nxv32i16.nxv32i8( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmacc.nxv1i32.nxv1i16( + , + , + , + i64); + +define @intrinsic_vwmacc_vv_nxv1i32_nxv1i16_nxv1i16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_vv_nxv1i32_nxv1i16_nxv1i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu +; CHECK-NEXT: vwmacc.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.nxv1i32.nxv1i16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmacc.mask.nxv1i32.nxv1i16( + , + , + , + , + i64); + +define @intrinsic_vwmacc_mask_vv_nxv1i32_nxv1i16_nxv1i16( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_mask_vv_nxv1i32_nxv1i16_nxv1i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu +; CHECK-NEXT: vwmacc.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.mask.nxv1i32.nxv1i16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmacc.nxv2i32.nxv2i16( + , + , + , + i64); + +define @intrinsic_vwmacc_vv_nxv2i32_nxv2i16_nxv2i16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_vv_nxv2i32_nxv2i16_nxv2i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu +; CHECK-NEXT: vwmacc.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.nxv2i32.nxv2i16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmacc.mask.nxv2i32.nxv2i16( + , + , + , + , + i64); + +define @intrinsic_vwmacc_mask_vv_nxv2i32_nxv2i16_nxv2i16( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_mask_vv_nxv2i32_nxv2i16_nxv2i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu +; CHECK-NEXT: vwmacc.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.mask.nxv2i32.nxv2i16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmacc.nxv4i32.nxv4i16( + , + , + , + i64); + +define @intrinsic_vwmacc_vv_nxv4i32_nxv4i16_nxv4i16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_vv_nxv4i32_nxv4i16_nxv4i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vwmacc.vv v16, v18, v19 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.nxv4i32.nxv4i16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmacc.mask.nxv4i32.nxv4i16( + , + , + , + , + i64); + +define @intrinsic_vwmacc_mask_vv_nxv4i32_nxv4i16_nxv4i16( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_mask_vv_nxv4i32_nxv4i16_nxv4i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vwmacc.vv v16, v18, v19, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.mask.nxv4i32.nxv4i16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmacc.nxv8i32.nxv8i16( + , + , + , + i64); + +define @intrinsic_vwmacc_vv_nxv8i32_nxv8i16_nxv8i16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_vv_nxv8i32_nxv8i16_nxv8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu +; CHECK-NEXT: vwmacc.vv v16, v20, v22 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.nxv8i32.nxv8i16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmacc.mask.nxv8i32.nxv8i16( + , + , + , + , + i64); + +define @intrinsic_vwmacc_mask_vv_nxv8i32_nxv8i16_nxv8i16( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_mask_vv_nxv8i32_nxv8i16_nxv8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu +; CHECK-NEXT: vwmacc.vv v16, v20, v22, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.mask.nxv8i32.nxv8i16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmacc.nxv16i32.nxv16i16( + , + , + , + i64); + +define @intrinsic_vwmacc_vv_nxv16i32_nxv16i16_nxv16i16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_vv_nxv16i32_nxv16i16_nxv16i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a1) +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vsetvli a0, a2, e16,m4,ta,mu +; CHECK-NEXT: vwmacc.vv v16, v8, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.nxv16i32.nxv16i16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmacc.mask.nxv16i32.nxv16i16( + , + , + , + , + i64); + +define @intrinsic_vwmacc_mask_vv_nxv16i32_nxv16i16_nxv16i16( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_mask_vv_nxv16i32_nxv16i16_nxv16i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a1) +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vsetvli a0, a2, e16,m4,ta,mu +; CHECK-NEXT: vwmacc.vv v16, v8, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.mask.nxv16i32.nxv16i16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmacc.nxv1i64.nxv1i32( + , + , + , + i64); + +define @intrinsic_vwmacc_vv_nxv1i64_nxv1i32_nxv1i32( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_vv_nxv1i64_nxv1i32_nxv1i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,mf2,ta,mu +; CHECK-NEXT: vwmacc.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.nxv1i64.nxv1i32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmacc.mask.nxv1i64.nxv1i32( + , + , + , + , + i64); + +define @intrinsic_vwmacc_mask_vv_nxv1i64_nxv1i32_nxv1i32( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_mask_vv_nxv1i64_nxv1i32_nxv1i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,mf2,ta,mu +; CHECK-NEXT: vwmacc.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.mask.nxv1i64.nxv1i32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmacc.nxv2i64.nxv2i32( + , + , + , + i64); + +define @intrinsic_vwmacc_vv_nxv2i64_nxv2i32_nxv2i32( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_vv_nxv2i64_nxv2i32_nxv2i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; CHECK-NEXT: vwmacc.vv v16, v18, v19 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.nxv2i64.nxv2i32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmacc.mask.nxv2i64.nxv2i32( + , + , + , + , + i64); + +define @intrinsic_vwmacc_mask_vv_nxv2i64_nxv2i32_nxv2i32( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_mask_vv_nxv2i64_nxv2i32_nxv2i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; CHECK-NEXT: vwmacc.vv v16, v18, v19, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.mask.nxv2i64.nxv2i32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmacc.nxv4i64.nxv4i32( + , + , + , + i64); + +define @intrinsic_vwmacc_vv_nxv4i64_nxv4i32_nxv4i32( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_vv_nxv4i64_nxv4i32_nxv4i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu +; CHECK-NEXT: vwmacc.vv v16, v20, v22 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.nxv4i64.nxv4i32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmacc.mask.nxv4i64.nxv4i32( + , + , + , + , + i64); + +define @intrinsic_vwmacc_mask_vv_nxv4i64_nxv4i32_nxv4i32( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_mask_vv_nxv4i64_nxv4i32_nxv4i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu +; CHECK-NEXT: vwmacc.vv v16, v20, v22, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.mask.nxv4i64.nxv4i32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmacc.nxv8i64.nxv8i32( + , + , + , + i64); + +define @intrinsic_vwmacc_vv_nxv8i64_nxv8i32_nxv8i32( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_vv_nxv8i64_nxv8i32_nxv8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e32,m4,ta,mu +; CHECK-NEXT: vle32.v v28, (a1) +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vsetvli a0, a2, e32,m4,ta,mu +; CHECK-NEXT: vwmacc.vv v16, v8, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.nxv8i64.nxv8i32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmacc.mask.nxv8i64.nxv8i32( + , + , + , + , + i64); + +define @intrinsic_vwmacc_mask_vv_nxv8i64_nxv8i32_nxv8i32( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_mask_vv_nxv8i64_nxv8i32_nxv8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e32,m4,ta,mu +; CHECK-NEXT: vle32.v v28, (a1) +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vsetvli a0, a2, e32,m4,ta,mu +; CHECK-NEXT: vwmacc.vv v16, v8, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.mask.nxv8i64.nxv8i32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmacc.nxv1i16.i8( + , + i8, + , + i64); + +define @intrinsic_vwmacc_vx_nxv1i16_i8_nxv1i8( %0, i8 %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_vx_nxv1i16_i8_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,mf8,ta,mu +; CHECK-NEXT: vwmacc.vx v16, a0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.nxv1i16.i8( + %0, + i8 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmacc.mask.nxv1i16.i8( + , + i8, + , + , + i64); + +define @intrinsic_vwmacc_mask_vx_nxv1i16_i8_nxv1i8( %0, i8 %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_mask_vx_nxv1i16_i8_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,mf8,ta,mu +; CHECK-NEXT: vwmacc.vx v16, a0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.mask.nxv1i16.i8( + %0, + i8 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmacc.nxv2i16.i8( + , + i8, + , + i64); + +define @intrinsic_vwmacc_vx_nxv2i16_i8_nxv2i8( %0, i8 %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_vx_nxv2i16_i8_nxv2i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,mf4,ta,mu +; CHECK-NEXT: vwmacc.vx v16, a0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.nxv2i16.i8( + %0, + i8 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmacc.mask.nxv2i16.i8( + , + i8, + , + , + i64); + +define @intrinsic_vwmacc_mask_vx_nxv2i16_i8_nxv2i8( %0, i8 %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_mask_vx_nxv2i16_i8_nxv2i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,mf4,ta,mu +; CHECK-NEXT: vwmacc.vx v16, a0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.mask.nxv2i16.i8( + %0, + i8 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmacc.nxv4i16.i8( + , + i8, + , + i64); + +define @intrinsic_vwmacc_vx_nxv4i16_i8_nxv4i8( %0, i8 %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_vx_nxv4i16_i8_nxv4i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,mf2,ta,mu +; CHECK-NEXT: vwmacc.vx v16, a0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.nxv4i16.i8( + %0, + i8 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmacc.mask.nxv4i16.i8( + , + i8, + , + , + i64); + +define @intrinsic_vwmacc_mask_vx_nxv4i16_i8_nxv4i8( %0, i8 %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_mask_vx_nxv4i16_i8_nxv4i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,mf2,ta,mu +; CHECK-NEXT: vwmacc.vx v16, a0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.mask.nxv4i16.i8( + %0, + i8 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmacc.nxv8i16.i8( + , + i8, + , + i64); + +define @intrinsic_vwmacc_vx_nxv8i16_i8_nxv8i8( %0, i8 %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_vx_nxv8i16_i8_nxv8i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vwmacc.vx v16, a0, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.nxv8i16.i8( + %0, + i8 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmacc.mask.nxv8i16.i8( + , + i8, + , + , + i64); + +define @intrinsic_vwmacc_mask_vx_nxv8i16_i8_nxv8i8( %0, i8 %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_mask_vx_nxv8i16_i8_nxv8i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vwmacc.vx v16, a0, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.mask.nxv8i16.i8( + %0, + i8 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmacc.nxv16i16.i8( + , + i8, + , + i64); + +define @intrinsic_vwmacc_vx_nxv16i16_i8_nxv16i8( %0, i8 %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_vx_nxv16i16_i8_nxv16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,m2,ta,mu +; CHECK-NEXT: vwmacc.vx v16, a0, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.nxv16i16.i8( + %0, + i8 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmacc.mask.nxv16i16.i8( + , + i8, + , + , + i64); + +define @intrinsic_vwmacc_mask_vx_nxv16i16_i8_nxv16i8( %0, i8 %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_mask_vx_nxv16i16_i8_nxv16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,m2,ta,mu +; CHECK-NEXT: vwmacc.vx v16, a0, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.mask.nxv16i16.i8( + %0, + i8 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmacc.nxv32i16.i8( + , + i8, + , + i64); + +define @intrinsic_vwmacc_vx_nxv32i16_i8_nxv32i8( %0, i8 %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_vx_nxv32i16_i8_nxv32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e8,m4,ta,mu +; CHECK-NEXT: vle8.v v28, (a1) +; CHECK-NEXT: vsetvli a1, a2, e8,m4,ta,mu +; CHECK-NEXT: vwmacc.vx v16, a0, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.nxv32i16.i8( + %0, + i8 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmacc.mask.nxv32i16.i8( + , + i8, + , + , + i64); + +define @intrinsic_vwmacc_mask_vx_nxv32i16_i8_nxv32i8( %0, i8 %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_mask_vx_nxv32i16_i8_nxv32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e8,m4,ta,mu +; CHECK-NEXT: vle8.v v28, (a1) +; CHECK-NEXT: vsetvli a1, a2, e8,m4,ta,mu +; CHECK-NEXT: vwmacc.vx v16, a0, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.mask.nxv32i16.i8( + %0, + i8 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmacc.nxv1i32.i16( + , + i16, + , + i64); + +define @intrinsic_vwmacc_vx_nxv1i32_i16_nxv1i16( %0, i16 %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_vx_nxv1i32_i16_nxv1i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,mf4,ta,mu +; CHECK-NEXT: vwmacc.vx v16, a0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.nxv1i32.i16( + %0, + i16 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmacc.mask.nxv1i32.i16( + , + i16, + , + , + i64); + +define @intrinsic_vwmacc_mask_vx_nxv1i32_i16_nxv1i16( %0, i16 %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_mask_vx_nxv1i32_i16_nxv1i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,mf4,ta,mu +; CHECK-NEXT: vwmacc.vx v16, a0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.mask.nxv1i32.i16( + %0, + i16 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmacc.nxv2i32.i16( + , + i16, + , + i64); + +define @intrinsic_vwmacc_vx_nxv2i32_i16_nxv2i16( %0, i16 %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_vx_nxv2i32_i16_nxv2i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,mf2,ta,mu +; CHECK-NEXT: vwmacc.vx v16, a0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.nxv2i32.i16( + %0, + i16 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmacc.mask.nxv2i32.i16( + , + i16, + , + , + i64); + +define @intrinsic_vwmacc_mask_vx_nxv2i32_i16_nxv2i16( %0, i16 %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_mask_vx_nxv2i32_i16_nxv2i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,mf2,ta,mu +; CHECK-NEXT: vwmacc.vx v16, a0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.mask.nxv2i32.i16( + %0, + i16 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmacc.nxv4i32.i16( + , + i16, + , + i64); + +define @intrinsic_vwmacc_vx_nxv4i32_i16_nxv4i16( %0, i16 %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_vx_nxv4i32_i16_nxv4i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vwmacc.vx v16, a0, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.nxv4i32.i16( + %0, + i16 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmacc.mask.nxv4i32.i16( + , + i16, + , + , + i64); + +define @intrinsic_vwmacc_mask_vx_nxv4i32_i16_nxv4i16( %0, i16 %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_mask_vx_nxv4i32_i16_nxv4i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vwmacc.vx v16, a0, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.mask.nxv4i32.i16( + %0, + i16 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmacc.nxv8i32.i16( + , + i16, + , + i64); + +define @intrinsic_vwmacc_vx_nxv8i32_i16_nxv8i16( %0, i16 %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_vx_nxv8i32_i16_nxv8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,m2,ta,mu +; CHECK-NEXT: vwmacc.vx v16, a0, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.nxv8i32.i16( + %0, + i16 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmacc.mask.nxv8i32.i16( + , + i16, + , + , + i64); + +define @intrinsic_vwmacc_mask_vx_nxv8i32_i16_nxv8i16( %0, i16 %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_mask_vx_nxv8i32_i16_nxv8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,m2,ta,mu +; CHECK-NEXT: vwmacc.vx v16, a0, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.mask.nxv8i32.i16( + %0, + i16 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmacc.nxv16i32.i16( + , + i16, + , + i64); + +define @intrinsic_vwmacc_vx_nxv16i32_i16_nxv16i16( %0, i16 %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_vx_nxv16i32_i16_nxv16i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a1) +; CHECK-NEXT: vsetvli a1, a2, e16,m4,ta,mu +; CHECK-NEXT: vwmacc.vx v16, a0, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.nxv16i32.i16( + %0, + i16 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmacc.mask.nxv16i32.i16( + , + i16, + , + , + i64); + +define @intrinsic_vwmacc_mask_vx_nxv16i32_i16_nxv16i16( %0, i16 %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_mask_vx_nxv16i32_i16_nxv16i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a1) +; CHECK-NEXT: vsetvli a1, a2, e16,m4,ta,mu +; CHECK-NEXT: vwmacc.vx v16, a0, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.mask.nxv16i32.i16( + %0, + i16 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmacc.nxv1i64.i32( + , + i32, + , + i64); + +define @intrinsic_vwmacc_vx_nxv1i64_i32_nxv1i32( %0, i32 %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_vx_nxv1i64_i32_nxv1i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e32,mf2,ta,mu +; CHECK-NEXT: vwmacc.vx v16, a0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.nxv1i64.i32( + %0, + i32 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmacc.mask.nxv1i64.i32( + , + i32, + , + , + i64); + +define @intrinsic_vwmacc_mask_vx_nxv1i64_i32_nxv1i32( %0, i32 %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_mask_vx_nxv1i64_i32_nxv1i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e32,mf2,ta,mu +; CHECK-NEXT: vwmacc.vx v16, a0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.mask.nxv1i64.i32( + %0, + i32 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmacc.nxv2i64.i32( + , + i32, + , + i64); + +define @intrinsic_vwmacc_vx_nxv2i64_i32_nxv2i32( %0, i32 %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_vx_nxv2i64_i32_nxv2i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; CHECK-NEXT: vwmacc.vx v16, a0, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.nxv2i64.i32( + %0, + i32 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmacc.mask.nxv2i64.i32( + , + i32, + , + , + i64); + +define @intrinsic_vwmacc_mask_vx_nxv2i64_i32_nxv2i32( %0, i32 %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_mask_vx_nxv2i64_i32_nxv2i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; CHECK-NEXT: vwmacc.vx v16, a0, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.mask.nxv2i64.i32( + %0, + i32 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmacc.nxv4i64.i32( + , + i32, + , + i64); + +define @intrinsic_vwmacc_vx_nxv4i64_i32_nxv4i32( %0, i32 %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_vx_nxv4i64_i32_nxv4i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e32,m2,ta,mu +; CHECK-NEXT: vwmacc.vx v16, a0, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.nxv4i64.i32( + %0, + i32 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmacc.mask.nxv4i64.i32( + , + i32, + , + , + i64); + +define @intrinsic_vwmacc_mask_vx_nxv4i64_i32_nxv4i32( %0, i32 %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_mask_vx_nxv4i64_i32_nxv4i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e32,m2,ta,mu +; CHECK-NEXT: vwmacc.vx v16, a0, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.mask.nxv4i64.i32( + %0, + i32 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmacc.nxv8i64.i32( + , + i32, + , + i64); + +define @intrinsic_vwmacc_vx_nxv8i64_i32_nxv8i32( %0, i32 %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_vx_nxv8i64_i32_nxv8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e32,m4,ta,mu +; CHECK-NEXT: vle32.v v28, (a1) +; CHECK-NEXT: vsetvli a1, a2, e32,m4,ta,mu +; CHECK-NEXT: vwmacc.vx v16, a0, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.nxv8i64.i32( + %0, + i32 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmacc.mask.nxv8i64.i32( + , + i32, + , + , + i64); + +define @intrinsic_vwmacc_mask_vx_nxv8i64_i32_nxv8i32( %0, i32 %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_mask_vx_nxv8i64_i32_nxv8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e32,m4,ta,mu +; CHECK-NEXT: vle32.v v28, (a1) +; CHECK-NEXT: vsetvli a1, a2, e32,m4,ta,mu +; CHECK-NEXT: vwmacc.vx v16, a0, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmacc.mask.nxv8i64.i32( + %0, + i32 %1, + %2, + %3, + i64 %4) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vwmaccsu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vwmaccsu-rv32.ll new file mode 100644 index 0000000000000..2d39ba95db2ef --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vwmaccsu-rv32.ll @@ -0,0 +1,1034 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare @llvm.riscv.vwmaccsu.nxv1i16.nxv1i8( + , + , + , + i32); + +define @intrinsic_vwmaccsu_vv_nxv1i16_nxv1i8_nxv1i8( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_vv_nxv1i16_nxv1i8_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e8,mf8,ta,mu +; CHECK-NEXT: vwmaccsu.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.nxv1i16.nxv1i8( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.mask.nxv1i16.nxv1i8( + , + , + , + , + i32); + +define @intrinsic_vwmaccsu_mask_vv_nxv1i16_nxv1i8_nxv1i8( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_mask_vv_nxv1i16_nxv1i8_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e8,mf8,ta,mu +; CHECK-NEXT: vwmaccsu.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.mask.nxv1i16.nxv1i8( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.nxv2i16.nxv2i8( + , + , + , + i32); + +define @intrinsic_vwmaccsu_vv_nxv2i16_nxv2i8_nxv2i8( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_vv_nxv2i16_nxv2i8_nxv2i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e8,mf4,ta,mu +; CHECK-NEXT: vwmaccsu.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.nxv2i16.nxv2i8( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.mask.nxv2i16.nxv2i8( + , + , + , + , + i32); + +define @intrinsic_vwmaccsu_mask_vv_nxv2i16_nxv2i8_nxv2i8( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_mask_vv_nxv2i16_nxv2i8_nxv2i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e8,mf4,ta,mu +; CHECK-NEXT: vwmaccsu.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.mask.nxv2i16.nxv2i8( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.nxv4i16.nxv4i8( + , + , + , + i32); + +define @intrinsic_vwmaccsu_vv_nxv4i16_nxv4i8_nxv4i8( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_vv_nxv4i16_nxv4i8_nxv4i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e8,mf2,ta,mu +; CHECK-NEXT: vwmaccsu.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.nxv4i16.nxv4i8( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.mask.nxv4i16.nxv4i8( + , + , + , + , + i32); + +define @intrinsic_vwmaccsu_mask_vv_nxv4i16_nxv4i8_nxv4i8( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_mask_vv_nxv4i16_nxv4i8_nxv4i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e8,mf2,ta,mu +; CHECK-NEXT: vwmaccsu.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.mask.nxv4i16.nxv4i8( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.nxv8i16.nxv8i8( + , + , + , + i32); + +define @intrinsic_vwmaccsu_vv_nxv8i16_nxv8i8_nxv8i8( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_vv_nxv8i16_nxv8i8_nxv8i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu +; CHECK-NEXT: vwmaccsu.vv v16, v18, v19 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.nxv8i16.nxv8i8( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.mask.nxv8i16.nxv8i8( + , + , + , + , + i32); + +define @intrinsic_vwmaccsu_mask_vv_nxv8i16_nxv8i8_nxv8i8( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_mask_vv_nxv8i16_nxv8i8_nxv8i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu +; CHECK-NEXT: vwmaccsu.vv v16, v18, v19, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.mask.nxv8i16.nxv8i8( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.nxv16i16.nxv16i8( + , + , + , + i32); + +define @intrinsic_vwmaccsu_vv_nxv16i16_nxv16i8_nxv16i8( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_vv_nxv16i16_nxv16i8_nxv16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e8,m2,ta,mu +; CHECK-NEXT: vwmaccsu.vv v16, v20, v22 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.nxv16i16.nxv16i8( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.mask.nxv16i16.nxv16i8( + , + , + , + , + i32); + +define @intrinsic_vwmaccsu_mask_vv_nxv16i16_nxv16i8_nxv16i8( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_mask_vv_nxv16i16_nxv16i8_nxv16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e8,m2,ta,mu +; CHECK-NEXT: vwmaccsu.vv v16, v20, v22, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.mask.nxv16i16.nxv16i8( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.nxv32i16.nxv32i8( + , + , + , + i32); + +define @intrinsic_vwmaccsu_vv_nxv32i16_nxv32i8_nxv32i8( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_vv_nxv32i16_nxv32i8_nxv32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e8,m4,ta,mu +; CHECK-NEXT: vle8.v v28, (a1) +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vsetvli a0, a2, e8,m4,ta,mu +; CHECK-NEXT: vwmaccsu.vv v16, v8, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.nxv32i16.nxv32i8( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.mask.nxv32i16.nxv32i8( + , + , + , + , + i32); + +define @intrinsic_vwmaccsu_mask_vv_nxv32i16_nxv32i8_nxv32i8( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_mask_vv_nxv32i16_nxv32i8_nxv32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e8,m4,ta,mu +; CHECK-NEXT: vle8.v v28, (a1) +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vsetvli a0, a2, e8,m4,ta,mu +; CHECK-NEXT: vwmaccsu.vv v16, v8, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.mask.nxv32i16.nxv32i8( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.nxv1i32.nxv1i16( + , + , + , + i32); + +define @intrinsic_vwmaccsu_vv_nxv1i32_nxv1i16_nxv1i16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_vv_nxv1i32_nxv1i16_nxv1i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu +; CHECK-NEXT: vwmaccsu.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.nxv1i32.nxv1i16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.mask.nxv1i32.nxv1i16( + , + , + , + , + i32); + +define @intrinsic_vwmaccsu_mask_vv_nxv1i32_nxv1i16_nxv1i16( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_mask_vv_nxv1i32_nxv1i16_nxv1i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu +; CHECK-NEXT: vwmaccsu.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.mask.nxv1i32.nxv1i16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.nxv2i32.nxv2i16( + , + , + , + i32); + +define @intrinsic_vwmaccsu_vv_nxv2i32_nxv2i16_nxv2i16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_vv_nxv2i32_nxv2i16_nxv2i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu +; CHECK-NEXT: vwmaccsu.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.nxv2i32.nxv2i16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.mask.nxv2i32.nxv2i16( + , + , + , + , + i32); + +define @intrinsic_vwmaccsu_mask_vv_nxv2i32_nxv2i16_nxv2i16( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_mask_vv_nxv2i32_nxv2i16_nxv2i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu +; CHECK-NEXT: vwmaccsu.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.mask.nxv2i32.nxv2i16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.nxv4i32.nxv4i16( + , + , + , + i32); + +define @intrinsic_vwmaccsu_vv_nxv4i32_nxv4i16_nxv4i16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_vv_nxv4i32_nxv4i16_nxv4i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vwmaccsu.vv v16, v18, v19 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.nxv4i32.nxv4i16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.mask.nxv4i32.nxv4i16( + , + , + , + , + i32); + +define @intrinsic_vwmaccsu_mask_vv_nxv4i32_nxv4i16_nxv4i16( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_mask_vv_nxv4i32_nxv4i16_nxv4i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vwmaccsu.vv v16, v18, v19, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.mask.nxv4i32.nxv4i16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.nxv8i32.nxv8i16( + , + , + , + i32); + +define @intrinsic_vwmaccsu_vv_nxv8i32_nxv8i16_nxv8i16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_vv_nxv8i32_nxv8i16_nxv8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu +; CHECK-NEXT: vwmaccsu.vv v16, v20, v22 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.nxv8i32.nxv8i16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.mask.nxv8i32.nxv8i16( + , + , + , + , + i32); + +define @intrinsic_vwmaccsu_mask_vv_nxv8i32_nxv8i16_nxv8i16( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_mask_vv_nxv8i32_nxv8i16_nxv8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu +; CHECK-NEXT: vwmaccsu.vv v16, v20, v22, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.mask.nxv8i32.nxv8i16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.nxv16i32.nxv16i16( + , + , + , + i32); + +define @intrinsic_vwmaccsu_vv_nxv16i32_nxv16i16_nxv16i16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_vv_nxv16i32_nxv16i16_nxv16i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a1) +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vsetvli a0, a2, e16,m4,ta,mu +; CHECK-NEXT: vwmaccsu.vv v16, v8, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.nxv16i32.nxv16i16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.mask.nxv16i32.nxv16i16( + , + , + , + , + i32); + +define @intrinsic_vwmaccsu_mask_vv_nxv16i32_nxv16i16_nxv16i16( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_mask_vv_nxv16i32_nxv16i16_nxv16i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a1) +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vsetvli a0, a2, e16,m4,ta,mu +; CHECK-NEXT: vwmaccsu.vv v16, v8, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.mask.nxv16i32.nxv16i16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.nxv1i16.i8( + , + i8, + , + i32); + +define @intrinsic_vwmaccsu_vx_nxv1i16_i8_nxv1i8( %0, i8 %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_vx_nxv1i16_i8_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,mf8,ta,mu +; CHECK-NEXT: vwmaccsu.vx v16, a0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.nxv1i16.i8( + %0, + i8 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.mask.nxv1i16.i8( + , + i8, + , + , + i32); + +define @intrinsic_vwmaccsu_mask_vx_nxv1i16_i8_nxv1i8( %0, i8 %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_mask_vx_nxv1i16_i8_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,mf8,ta,mu +; CHECK-NEXT: vwmaccsu.vx v16, a0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.mask.nxv1i16.i8( + %0, + i8 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.nxv2i16.i8( + , + i8, + , + i32); + +define @intrinsic_vwmaccsu_vx_nxv2i16_i8_nxv2i8( %0, i8 %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_vx_nxv2i16_i8_nxv2i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,mf4,ta,mu +; CHECK-NEXT: vwmaccsu.vx v16, a0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.nxv2i16.i8( + %0, + i8 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.mask.nxv2i16.i8( + , + i8, + , + , + i32); + +define @intrinsic_vwmaccsu_mask_vx_nxv2i16_i8_nxv2i8( %0, i8 %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_mask_vx_nxv2i16_i8_nxv2i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,mf4,ta,mu +; CHECK-NEXT: vwmaccsu.vx v16, a0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.mask.nxv2i16.i8( + %0, + i8 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.nxv4i16.i8( + , + i8, + , + i32); + +define @intrinsic_vwmaccsu_vx_nxv4i16_i8_nxv4i8( %0, i8 %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_vx_nxv4i16_i8_nxv4i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,mf2,ta,mu +; CHECK-NEXT: vwmaccsu.vx v16, a0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.nxv4i16.i8( + %0, + i8 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.mask.nxv4i16.i8( + , + i8, + , + , + i32); + +define @intrinsic_vwmaccsu_mask_vx_nxv4i16_i8_nxv4i8( %0, i8 %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_mask_vx_nxv4i16_i8_nxv4i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,mf2,ta,mu +; CHECK-NEXT: vwmaccsu.vx v16, a0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.mask.nxv4i16.i8( + %0, + i8 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.nxv8i16.i8( + , + i8, + , + i32); + +define @intrinsic_vwmaccsu_vx_nxv8i16_i8_nxv8i8( %0, i8 %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_vx_nxv8i16_i8_nxv8i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vwmaccsu.vx v16, a0, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.nxv8i16.i8( + %0, + i8 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.mask.nxv8i16.i8( + , + i8, + , + , + i32); + +define @intrinsic_vwmaccsu_mask_vx_nxv8i16_i8_nxv8i8( %0, i8 %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_mask_vx_nxv8i16_i8_nxv8i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vwmaccsu.vx v16, a0, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.mask.nxv8i16.i8( + %0, + i8 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.nxv16i16.i8( + , + i8, + , + i32); + +define @intrinsic_vwmaccsu_vx_nxv16i16_i8_nxv16i8( %0, i8 %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_vx_nxv16i16_i8_nxv16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,m2,ta,mu +; CHECK-NEXT: vwmaccsu.vx v16, a0, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.nxv16i16.i8( + %0, + i8 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.mask.nxv16i16.i8( + , + i8, + , + , + i32); + +define @intrinsic_vwmaccsu_mask_vx_nxv16i16_i8_nxv16i8( %0, i8 %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_mask_vx_nxv16i16_i8_nxv16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,m2,ta,mu +; CHECK-NEXT: vwmaccsu.vx v16, a0, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.mask.nxv16i16.i8( + %0, + i8 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.nxv32i16.i8( + , + i8, + , + i32); + +define @intrinsic_vwmaccsu_vx_nxv32i16_i8_nxv32i8( %0, i8 %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_vx_nxv32i16_i8_nxv32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e8,m4,ta,mu +; CHECK-NEXT: vle8.v v28, (a1) +; CHECK-NEXT: vsetvli a1, a2, e8,m4,ta,mu +; CHECK-NEXT: vwmaccsu.vx v16, a0, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.nxv32i16.i8( + %0, + i8 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.mask.nxv32i16.i8( + , + i8, + , + , + i32); + +define @intrinsic_vwmaccsu_mask_vx_nxv32i16_i8_nxv32i8( %0, i8 %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_mask_vx_nxv32i16_i8_nxv32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e8,m4,ta,mu +; CHECK-NEXT: vle8.v v28, (a1) +; CHECK-NEXT: vsetvli a1, a2, e8,m4,ta,mu +; CHECK-NEXT: vwmaccsu.vx v16, a0, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.mask.nxv32i16.i8( + %0, + i8 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.nxv1i32.i16( + , + i16, + , + i32); + +define @intrinsic_vwmaccsu_vx_nxv1i32_i16_nxv1i16( %0, i16 %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_vx_nxv1i32_i16_nxv1i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,mf4,ta,mu +; CHECK-NEXT: vwmaccsu.vx v16, a0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.nxv1i32.i16( + %0, + i16 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.mask.nxv1i32.i16( + , + i16, + , + , + i32); + +define @intrinsic_vwmaccsu_mask_vx_nxv1i32_i16_nxv1i16( %0, i16 %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_mask_vx_nxv1i32_i16_nxv1i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,mf4,ta,mu +; CHECK-NEXT: vwmaccsu.vx v16, a0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.mask.nxv1i32.i16( + %0, + i16 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.nxv2i32.i16( + , + i16, + , + i32); + +define @intrinsic_vwmaccsu_vx_nxv2i32_i16_nxv2i16( %0, i16 %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_vx_nxv2i32_i16_nxv2i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,mf2,ta,mu +; CHECK-NEXT: vwmaccsu.vx v16, a0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.nxv2i32.i16( + %0, + i16 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.mask.nxv2i32.i16( + , + i16, + , + , + i32); + +define @intrinsic_vwmaccsu_mask_vx_nxv2i32_i16_nxv2i16( %0, i16 %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_mask_vx_nxv2i32_i16_nxv2i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,mf2,ta,mu +; CHECK-NEXT: vwmaccsu.vx v16, a0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.mask.nxv2i32.i16( + %0, + i16 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.nxv4i32.i16( + , + i16, + , + i32); + +define @intrinsic_vwmaccsu_vx_nxv4i32_i16_nxv4i16( %0, i16 %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_vx_nxv4i32_i16_nxv4i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vwmaccsu.vx v16, a0, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.nxv4i32.i16( + %0, + i16 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.mask.nxv4i32.i16( + , + i16, + , + , + i32); + +define @intrinsic_vwmaccsu_mask_vx_nxv4i32_i16_nxv4i16( %0, i16 %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_mask_vx_nxv4i32_i16_nxv4i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vwmaccsu.vx v16, a0, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.mask.nxv4i32.i16( + %0, + i16 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.nxv8i32.i16( + , + i16, + , + i32); + +define @intrinsic_vwmaccsu_vx_nxv8i32_i16_nxv8i16( %0, i16 %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_vx_nxv8i32_i16_nxv8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,m2,ta,mu +; CHECK-NEXT: vwmaccsu.vx v16, a0, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.nxv8i32.i16( + %0, + i16 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.mask.nxv8i32.i16( + , + i16, + , + , + i32); + +define @intrinsic_vwmaccsu_mask_vx_nxv8i32_i16_nxv8i16( %0, i16 %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_mask_vx_nxv8i32_i16_nxv8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,m2,ta,mu +; CHECK-NEXT: vwmaccsu.vx v16, a0, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.mask.nxv8i32.i16( + %0, + i16 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.nxv16i32.i16( + , + i16, + , + i32); + +define @intrinsic_vwmaccsu_vx_nxv16i32_i16_nxv16i16( %0, i16 %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_vx_nxv16i32_i16_nxv16i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a1) +; CHECK-NEXT: vsetvli a1, a2, e16,m4,ta,mu +; CHECK-NEXT: vwmaccsu.vx v16, a0, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.nxv16i32.i16( + %0, + i16 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.mask.nxv16i32.i16( + , + i16, + , + , + i32); + +define @intrinsic_vwmaccsu_mask_vx_nxv16i32_i16_nxv16i16( %0, i16 %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_mask_vx_nxv16i32_i16_nxv16i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a1) +; CHECK-NEXT: vsetvli a1, a2, e16,m4,ta,mu +; CHECK-NEXT: vwmaccsu.vx v16, a0, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.mask.nxv16i32.i16( + %0, + i16 %1, + %2, + %3, + i32 %4) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vwmaccsu-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vwmaccsu-rv64.ll new file mode 100644 index 0000000000000..c274246b7b530 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vwmaccsu-rv64.ll @@ -0,0 +1,1412 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare @llvm.riscv.vwmaccsu.nxv1i16.nxv1i8( + , + , + , + i64); + +define @intrinsic_vwmaccsu_vv_nxv1i16_nxv1i8_nxv1i8( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_vv_nxv1i16_nxv1i8_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e8,mf8,ta,mu +; CHECK-NEXT: vwmaccsu.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.nxv1i16.nxv1i8( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.mask.nxv1i16.nxv1i8( + , + , + , + , + i64); + +define @intrinsic_vwmaccsu_mask_vv_nxv1i16_nxv1i8_nxv1i8( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_mask_vv_nxv1i16_nxv1i8_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e8,mf8,ta,mu +; CHECK-NEXT: vwmaccsu.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.mask.nxv1i16.nxv1i8( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.nxv2i16.nxv2i8( + , + , + , + i64); + +define @intrinsic_vwmaccsu_vv_nxv2i16_nxv2i8_nxv2i8( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_vv_nxv2i16_nxv2i8_nxv2i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e8,mf4,ta,mu +; CHECK-NEXT: vwmaccsu.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.nxv2i16.nxv2i8( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.mask.nxv2i16.nxv2i8( + , + , + , + , + i64); + +define @intrinsic_vwmaccsu_mask_vv_nxv2i16_nxv2i8_nxv2i8( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_mask_vv_nxv2i16_nxv2i8_nxv2i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e8,mf4,ta,mu +; CHECK-NEXT: vwmaccsu.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.mask.nxv2i16.nxv2i8( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.nxv4i16.nxv4i8( + , + , + , + i64); + +define @intrinsic_vwmaccsu_vv_nxv4i16_nxv4i8_nxv4i8( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_vv_nxv4i16_nxv4i8_nxv4i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e8,mf2,ta,mu +; CHECK-NEXT: vwmaccsu.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.nxv4i16.nxv4i8( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.mask.nxv4i16.nxv4i8( + , + , + , + , + i64); + +define @intrinsic_vwmaccsu_mask_vv_nxv4i16_nxv4i8_nxv4i8( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_mask_vv_nxv4i16_nxv4i8_nxv4i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e8,mf2,ta,mu +; CHECK-NEXT: vwmaccsu.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.mask.nxv4i16.nxv4i8( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.nxv8i16.nxv8i8( + , + , + , + i64); + +define @intrinsic_vwmaccsu_vv_nxv8i16_nxv8i8_nxv8i8( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_vv_nxv8i16_nxv8i8_nxv8i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu +; CHECK-NEXT: vwmaccsu.vv v16, v18, v19 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.nxv8i16.nxv8i8( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.mask.nxv8i16.nxv8i8( + , + , + , + , + i64); + +define @intrinsic_vwmaccsu_mask_vv_nxv8i16_nxv8i8_nxv8i8( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_mask_vv_nxv8i16_nxv8i8_nxv8i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu +; CHECK-NEXT: vwmaccsu.vv v16, v18, v19, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.mask.nxv8i16.nxv8i8( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.nxv16i16.nxv16i8( + , + , + , + i64); + +define @intrinsic_vwmaccsu_vv_nxv16i16_nxv16i8_nxv16i8( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_vv_nxv16i16_nxv16i8_nxv16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e8,m2,ta,mu +; CHECK-NEXT: vwmaccsu.vv v16, v20, v22 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.nxv16i16.nxv16i8( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.mask.nxv16i16.nxv16i8( + , + , + , + , + i64); + +define @intrinsic_vwmaccsu_mask_vv_nxv16i16_nxv16i8_nxv16i8( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_mask_vv_nxv16i16_nxv16i8_nxv16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e8,m2,ta,mu +; CHECK-NEXT: vwmaccsu.vv v16, v20, v22, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.mask.nxv16i16.nxv16i8( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.nxv32i16.nxv32i8( + , + , + , + i64); + +define @intrinsic_vwmaccsu_vv_nxv32i16_nxv32i8_nxv32i8( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_vv_nxv32i16_nxv32i8_nxv32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e8,m4,ta,mu +; CHECK-NEXT: vle8.v v28, (a1) +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vsetvli a0, a2, e8,m4,ta,mu +; CHECK-NEXT: vwmaccsu.vv v16, v8, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.nxv32i16.nxv32i8( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.mask.nxv32i16.nxv32i8( + , + , + , + , + i64); + +define @intrinsic_vwmaccsu_mask_vv_nxv32i16_nxv32i8_nxv32i8( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_mask_vv_nxv32i16_nxv32i8_nxv32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e8,m4,ta,mu +; CHECK-NEXT: vle8.v v28, (a1) +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vsetvli a0, a2, e8,m4,ta,mu +; CHECK-NEXT: vwmaccsu.vv v16, v8, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.mask.nxv32i16.nxv32i8( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.nxv1i32.nxv1i16( + , + , + , + i64); + +define @intrinsic_vwmaccsu_vv_nxv1i32_nxv1i16_nxv1i16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_vv_nxv1i32_nxv1i16_nxv1i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu +; CHECK-NEXT: vwmaccsu.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.nxv1i32.nxv1i16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.mask.nxv1i32.nxv1i16( + , + , + , + , + i64); + +define @intrinsic_vwmaccsu_mask_vv_nxv1i32_nxv1i16_nxv1i16( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_mask_vv_nxv1i32_nxv1i16_nxv1i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu +; CHECK-NEXT: vwmaccsu.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.mask.nxv1i32.nxv1i16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.nxv2i32.nxv2i16( + , + , + , + i64); + +define @intrinsic_vwmaccsu_vv_nxv2i32_nxv2i16_nxv2i16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_vv_nxv2i32_nxv2i16_nxv2i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu +; CHECK-NEXT: vwmaccsu.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.nxv2i32.nxv2i16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.mask.nxv2i32.nxv2i16( + , + , + , + , + i64); + +define @intrinsic_vwmaccsu_mask_vv_nxv2i32_nxv2i16_nxv2i16( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_mask_vv_nxv2i32_nxv2i16_nxv2i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu +; CHECK-NEXT: vwmaccsu.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.mask.nxv2i32.nxv2i16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.nxv4i32.nxv4i16( + , + , + , + i64); + +define @intrinsic_vwmaccsu_vv_nxv4i32_nxv4i16_nxv4i16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_vv_nxv4i32_nxv4i16_nxv4i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vwmaccsu.vv v16, v18, v19 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.nxv4i32.nxv4i16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.mask.nxv4i32.nxv4i16( + , + , + , + , + i64); + +define @intrinsic_vwmaccsu_mask_vv_nxv4i32_nxv4i16_nxv4i16( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_mask_vv_nxv4i32_nxv4i16_nxv4i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vwmaccsu.vv v16, v18, v19, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.mask.nxv4i32.nxv4i16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.nxv8i32.nxv8i16( + , + , + , + i64); + +define @intrinsic_vwmaccsu_vv_nxv8i32_nxv8i16_nxv8i16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_vv_nxv8i32_nxv8i16_nxv8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu +; CHECK-NEXT: vwmaccsu.vv v16, v20, v22 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.nxv8i32.nxv8i16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.mask.nxv8i32.nxv8i16( + , + , + , + , + i64); + +define @intrinsic_vwmaccsu_mask_vv_nxv8i32_nxv8i16_nxv8i16( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_mask_vv_nxv8i32_nxv8i16_nxv8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu +; CHECK-NEXT: vwmaccsu.vv v16, v20, v22, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.mask.nxv8i32.nxv8i16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.nxv16i32.nxv16i16( + , + , + , + i64); + +define @intrinsic_vwmaccsu_vv_nxv16i32_nxv16i16_nxv16i16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_vv_nxv16i32_nxv16i16_nxv16i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a1) +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vsetvli a0, a2, e16,m4,ta,mu +; CHECK-NEXT: vwmaccsu.vv v16, v8, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.nxv16i32.nxv16i16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.mask.nxv16i32.nxv16i16( + , + , + , + , + i64); + +define @intrinsic_vwmaccsu_mask_vv_nxv16i32_nxv16i16_nxv16i16( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_mask_vv_nxv16i32_nxv16i16_nxv16i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a1) +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vsetvli a0, a2, e16,m4,ta,mu +; CHECK-NEXT: vwmaccsu.vv v16, v8, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.mask.nxv16i32.nxv16i16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.nxv1i64.nxv1i32( + , + , + , + i64); + +define @intrinsic_vwmaccsu_vv_nxv1i64_nxv1i32_nxv1i32( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_vv_nxv1i64_nxv1i32_nxv1i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,mf2,ta,mu +; CHECK-NEXT: vwmaccsu.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.nxv1i64.nxv1i32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.mask.nxv1i64.nxv1i32( + , + , + , + , + i64); + +define @intrinsic_vwmaccsu_mask_vv_nxv1i64_nxv1i32_nxv1i32( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_mask_vv_nxv1i64_nxv1i32_nxv1i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,mf2,ta,mu +; CHECK-NEXT: vwmaccsu.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.mask.nxv1i64.nxv1i32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.nxv2i64.nxv2i32( + , + , + , + i64); + +define @intrinsic_vwmaccsu_vv_nxv2i64_nxv2i32_nxv2i32( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_vv_nxv2i64_nxv2i32_nxv2i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; CHECK-NEXT: vwmaccsu.vv v16, v18, v19 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.nxv2i64.nxv2i32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.mask.nxv2i64.nxv2i32( + , + , + , + , + i64); + +define @intrinsic_vwmaccsu_mask_vv_nxv2i64_nxv2i32_nxv2i32( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_mask_vv_nxv2i64_nxv2i32_nxv2i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; CHECK-NEXT: vwmaccsu.vv v16, v18, v19, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.mask.nxv2i64.nxv2i32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.nxv4i64.nxv4i32( + , + , + , + i64); + +define @intrinsic_vwmaccsu_vv_nxv4i64_nxv4i32_nxv4i32( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_vv_nxv4i64_nxv4i32_nxv4i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu +; CHECK-NEXT: vwmaccsu.vv v16, v20, v22 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.nxv4i64.nxv4i32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.mask.nxv4i64.nxv4i32( + , + , + , + , + i64); + +define @intrinsic_vwmaccsu_mask_vv_nxv4i64_nxv4i32_nxv4i32( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_mask_vv_nxv4i64_nxv4i32_nxv4i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu +; CHECK-NEXT: vwmaccsu.vv v16, v20, v22, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.mask.nxv4i64.nxv4i32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.nxv8i64.nxv8i32( + , + , + , + i64); + +define @intrinsic_vwmaccsu_vv_nxv8i64_nxv8i32_nxv8i32( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_vv_nxv8i64_nxv8i32_nxv8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e32,m4,ta,mu +; CHECK-NEXT: vle32.v v28, (a1) +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vsetvli a0, a2, e32,m4,ta,mu +; CHECK-NEXT: vwmaccsu.vv v16, v8, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.nxv8i64.nxv8i32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.mask.nxv8i64.nxv8i32( + , + , + , + , + i64); + +define @intrinsic_vwmaccsu_mask_vv_nxv8i64_nxv8i32_nxv8i32( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_mask_vv_nxv8i64_nxv8i32_nxv8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e32,m4,ta,mu +; CHECK-NEXT: vle32.v v28, (a1) +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vsetvli a0, a2, e32,m4,ta,mu +; CHECK-NEXT: vwmaccsu.vv v16, v8, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.mask.nxv8i64.nxv8i32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.nxv1i16.i8( + , + i8, + , + i64); + +define @intrinsic_vwmaccsu_vx_nxv1i16_i8_nxv1i8( %0, i8 %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_vx_nxv1i16_i8_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,mf8,ta,mu +; CHECK-NEXT: vwmaccsu.vx v16, a0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.nxv1i16.i8( + %0, + i8 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.mask.nxv1i16.i8( + , + i8, + , + , + i64); + +define @intrinsic_vwmaccsu_mask_vx_nxv1i16_i8_nxv1i8( %0, i8 %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_mask_vx_nxv1i16_i8_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,mf8,ta,mu +; CHECK-NEXT: vwmaccsu.vx v16, a0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.mask.nxv1i16.i8( + %0, + i8 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.nxv2i16.i8( + , + i8, + , + i64); + +define @intrinsic_vwmaccsu_vx_nxv2i16_i8_nxv2i8( %0, i8 %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_vx_nxv2i16_i8_nxv2i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,mf4,ta,mu +; CHECK-NEXT: vwmaccsu.vx v16, a0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.nxv2i16.i8( + %0, + i8 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.mask.nxv2i16.i8( + , + i8, + , + , + i64); + +define @intrinsic_vwmaccsu_mask_vx_nxv2i16_i8_nxv2i8( %0, i8 %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_mask_vx_nxv2i16_i8_nxv2i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,mf4,ta,mu +; CHECK-NEXT: vwmaccsu.vx v16, a0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.mask.nxv2i16.i8( + %0, + i8 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.nxv4i16.i8( + , + i8, + , + i64); + +define @intrinsic_vwmaccsu_vx_nxv4i16_i8_nxv4i8( %0, i8 %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_vx_nxv4i16_i8_nxv4i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,mf2,ta,mu +; CHECK-NEXT: vwmaccsu.vx v16, a0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.nxv4i16.i8( + %0, + i8 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.mask.nxv4i16.i8( + , + i8, + , + , + i64); + +define @intrinsic_vwmaccsu_mask_vx_nxv4i16_i8_nxv4i8( %0, i8 %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_mask_vx_nxv4i16_i8_nxv4i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,mf2,ta,mu +; CHECK-NEXT: vwmaccsu.vx v16, a0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.mask.nxv4i16.i8( + %0, + i8 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.nxv8i16.i8( + , + i8, + , + i64); + +define @intrinsic_vwmaccsu_vx_nxv8i16_i8_nxv8i8( %0, i8 %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_vx_nxv8i16_i8_nxv8i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vwmaccsu.vx v16, a0, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.nxv8i16.i8( + %0, + i8 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.mask.nxv8i16.i8( + , + i8, + , + , + i64); + +define @intrinsic_vwmaccsu_mask_vx_nxv8i16_i8_nxv8i8( %0, i8 %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_mask_vx_nxv8i16_i8_nxv8i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vwmaccsu.vx v16, a0, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.mask.nxv8i16.i8( + %0, + i8 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.nxv16i16.i8( + , + i8, + , + i64); + +define @intrinsic_vwmaccsu_vx_nxv16i16_i8_nxv16i8( %0, i8 %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_vx_nxv16i16_i8_nxv16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,m2,ta,mu +; CHECK-NEXT: vwmaccsu.vx v16, a0, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.nxv16i16.i8( + %0, + i8 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.mask.nxv16i16.i8( + , + i8, + , + , + i64); + +define @intrinsic_vwmaccsu_mask_vx_nxv16i16_i8_nxv16i8( %0, i8 %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_mask_vx_nxv16i16_i8_nxv16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,m2,ta,mu +; CHECK-NEXT: vwmaccsu.vx v16, a0, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.mask.nxv16i16.i8( + %0, + i8 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.nxv32i16.i8( + , + i8, + , + i64); + +define @intrinsic_vwmaccsu_vx_nxv32i16_i8_nxv32i8( %0, i8 %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_vx_nxv32i16_i8_nxv32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e8,m4,ta,mu +; CHECK-NEXT: vle8.v v28, (a1) +; CHECK-NEXT: vsetvli a1, a2, e8,m4,ta,mu +; CHECK-NEXT: vwmaccsu.vx v16, a0, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.nxv32i16.i8( + %0, + i8 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.mask.nxv32i16.i8( + , + i8, + , + , + i64); + +define @intrinsic_vwmaccsu_mask_vx_nxv32i16_i8_nxv32i8( %0, i8 %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_mask_vx_nxv32i16_i8_nxv32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e8,m4,ta,mu +; CHECK-NEXT: vle8.v v28, (a1) +; CHECK-NEXT: vsetvli a1, a2, e8,m4,ta,mu +; CHECK-NEXT: vwmaccsu.vx v16, a0, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.mask.nxv32i16.i8( + %0, + i8 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.nxv1i32.i16( + , + i16, + , + i64); + +define @intrinsic_vwmaccsu_vx_nxv1i32_i16_nxv1i16( %0, i16 %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_vx_nxv1i32_i16_nxv1i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,mf4,ta,mu +; CHECK-NEXT: vwmaccsu.vx v16, a0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.nxv1i32.i16( + %0, + i16 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.mask.nxv1i32.i16( + , + i16, + , + , + i64); + +define @intrinsic_vwmaccsu_mask_vx_nxv1i32_i16_nxv1i16( %0, i16 %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_mask_vx_nxv1i32_i16_nxv1i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,mf4,ta,mu +; CHECK-NEXT: vwmaccsu.vx v16, a0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.mask.nxv1i32.i16( + %0, + i16 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.nxv2i32.i16( + , + i16, + , + i64); + +define @intrinsic_vwmaccsu_vx_nxv2i32_i16_nxv2i16( %0, i16 %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_vx_nxv2i32_i16_nxv2i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,mf2,ta,mu +; CHECK-NEXT: vwmaccsu.vx v16, a0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.nxv2i32.i16( + %0, + i16 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.mask.nxv2i32.i16( + , + i16, + , + , + i64); + +define @intrinsic_vwmaccsu_mask_vx_nxv2i32_i16_nxv2i16( %0, i16 %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_mask_vx_nxv2i32_i16_nxv2i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,mf2,ta,mu +; CHECK-NEXT: vwmaccsu.vx v16, a0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.mask.nxv2i32.i16( + %0, + i16 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.nxv4i32.i16( + , + i16, + , + i64); + +define @intrinsic_vwmaccsu_vx_nxv4i32_i16_nxv4i16( %0, i16 %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_vx_nxv4i32_i16_nxv4i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vwmaccsu.vx v16, a0, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.nxv4i32.i16( + %0, + i16 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.mask.nxv4i32.i16( + , + i16, + , + , + i64); + +define @intrinsic_vwmaccsu_mask_vx_nxv4i32_i16_nxv4i16( %0, i16 %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_mask_vx_nxv4i32_i16_nxv4i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vwmaccsu.vx v16, a0, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.mask.nxv4i32.i16( + %0, + i16 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.nxv8i32.i16( + , + i16, + , + i64); + +define @intrinsic_vwmaccsu_vx_nxv8i32_i16_nxv8i16( %0, i16 %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_vx_nxv8i32_i16_nxv8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,m2,ta,mu +; CHECK-NEXT: vwmaccsu.vx v16, a0, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.nxv8i32.i16( + %0, + i16 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.mask.nxv8i32.i16( + , + i16, + , + , + i64); + +define @intrinsic_vwmaccsu_mask_vx_nxv8i32_i16_nxv8i16( %0, i16 %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_mask_vx_nxv8i32_i16_nxv8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,m2,ta,mu +; CHECK-NEXT: vwmaccsu.vx v16, a0, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.mask.nxv8i32.i16( + %0, + i16 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.nxv16i32.i16( + , + i16, + , + i64); + +define @intrinsic_vwmaccsu_vx_nxv16i32_i16_nxv16i16( %0, i16 %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_vx_nxv16i32_i16_nxv16i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a1) +; CHECK-NEXT: vsetvli a1, a2, e16,m4,ta,mu +; CHECK-NEXT: vwmaccsu.vx v16, a0, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.nxv16i32.i16( + %0, + i16 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.mask.nxv16i32.i16( + , + i16, + , + , + i64); + +define @intrinsic_vwmaccsu_mask_vx_nxv16i32_i16_nxv16i16( %0, i16 %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_mask_vx_nxv16i32_i16_nxv16i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a1) +; CHECK-NEXT: vsetvli a1, a2, e16,m4,ta,mu +; CHECK-NEXT: vwmaccsu.vx v16, a0, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.mask.nxv16i32.i16( + %0, + i16 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.nxv1i64.i32( + , + i32, + , + i64); + +define @intrinsic_vwmaccsu_vx_nxv1i64_i32_nxv1i32( %0, i32 %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_vx_nxv1i64_i32_nxv1i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e32,mf2,ta,mu +; CHECK-NEXT: vwmaccsu.vx v16, a0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.nxv1i64.i32( + %0, + i32 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.mask.nxv1i64.i32( + , + i32, + , + , + i64); + +define @intrinsic_vwmaccsu_mask_vx_nxv1i64_i32_nxv1i32( %0, i32 %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_mask_vx_nxv1i64_i32_nxv1i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e32,mf2,ta,mu +; CHECK-NEXT: vwmaccsu.vx v16, a0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.mask.nxv1i64.i32( + %0, + i32 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.nxv2i64.i32( + , + i32, + , + i64); + +define @intrinsic_vwmaccsu_vx_nxv2i64_i32_nxv2i32( %0, i32 %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_vx_nxv2i64_i32_nxv2i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; CHECK-NEXT: vwmaccsu.vx v16, a0, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.nxv2i64.i32( + %0, + i32 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.mask.nxv2i64.i32( + , + i32, + , + , + i64); + +define @intrinsic_vwmaccsu_mask_vx_nxv2i64_i32_nxv2i32( %0, i32 %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_mask_vx_nxv2i64_i32_nxv2i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; CHECK-NEXT: vwmaccsu.vx v16, a0, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.mask.nxv2i64.i32( + %0, + i32 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.nxv4i64.i32( + , + i32, + , + i64); + +define @intrinsic_vwmaccsu_vx_nxv4i64_i32_nxv4i32( %0, i32 %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_vx_nxv4i64_i32_nxv4i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e32,m2,ta,mu +; CHECK-NEXT: vwmaccsu.vx v16, a0, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.nxv4i64.i32( + %0, + i32 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.mask.nxv4i64.i32( + , + i32, + , + , + i64); + +define @intrinsic_vwmaccsu_mask_vx_nxv4i64_i32_nxv4i32( %0, i32 %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_mask_vx_nxv4i64_i32_nxv4i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e32,m2,ta,mu +; CHECK-NEXT: vwmaccsu.vx v16, a0, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.mask.nxv4i64.i32( + %0, + i32 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.nxv8i64.i32( + , + i32, + , + i64); + +define @intrinsic_vwmaccsu_vx_nxv8i64_i32_nxv8i32( %0, i32 %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_vx_nxv8i64_i32_nxv8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e32,m4,ta,mu +; CHECK-NEXT: vle32.v v28, (a1) +; CHECK-NEXT: vsetvli a1, a2, e32,m4,ta,mu +; CHECK-NEXT: vwmaccsu.vx v16, a0, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.nxv8i64.i32( + %0, + i32 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccsu.mask.nxv8i64.i32( + , + i32, + , + , + i64); + +define @intrinsic_vwmaccsu_mask_vx_nxv8i64_i32_nxv8i32( %0, i32 %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccsu_mask_vx_nxv8i64_i32_nxv8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e32,m4,ta,mu +; CHECK-NEXT: vle32.v v28, (a1) +; CHECK-NEXT: vsetvli a1, a2, e32,m4,ta,mu +; CHECK-NEXT: vwmaccsu.vx v16, a0, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccsu.mask.nxv8i64.i32( + %0, + i32 %1, + %2, + %3, + i64 %4) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vwmaccu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vwmaccu-rv32.ll new file mode 100644 index 0000000000000..2bc594e82d6b5 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vwmaccu-rv32.ll @@ -0,0 +1,1034 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare @llvm.riscv.vwmaccu.nxv1i16.nxv1i8( + , + , + , + i32); + +define @intrinsic_vwmaccu_vv_nxv1i16_nxv1i8_nxv1i8( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_vv_nxv1i16_nxv1i8_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e8,mf8,ta,mu +; CHECK-NEXT: vwmaccu.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.nxv1i16.nxv1i8( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccu.mask.nxv1i16.nxv1i8( + , + , + , + , + i32); + +define @intrinsic_vwmaccu_mask_vv_nxv1i16_nxv1i8_nxv1i8( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_mask_vv_nxv1i16_nxv1i8_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e8,mf8,ta,mu +; CHECK-NEXT: vwmaccu.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.mask.nxv1i16.nxv1i8( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccu.nxv2i16.nxv2i8( + , + , + , + i32); + +define @intrinsic_vwmaccu_vv_nxv2i16_nxv2i8_nxv2i8( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_vv_nxv2i16_nxv2i8_nxv2i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e8,mf4,ta,mu +; CHECK-NEXT: vwmaccu.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.nxv2i16.nxv2i8( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccu.mask.nxv2i16.nxv2i8( + , + , + , + , + i32); + +define @intrinsic_vwmaccu_mask_vv_nxv2i16_nxv2i8_nxv2i8( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_mask_vv_nxv2i16_nxv2i8_nxv2i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e8,mf4,ta,mu +; CHECK-NEXT: vwmaccu.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.mask.nxv2i16.nxv2i8( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccu.nxv4i16.nxv4i8( + , + , + , + i32); + +define @intrinsic_vwmaccu_vv_nxv4i16_nxv4i8_nxv4i8( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_vv_nxv4i16_nxv4i8_nxv4i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e8,mf2,ta,mu +; CHECK-NEXT: vwmaccu.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.nxv4i16.nxv4i8( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccu.mask.nxv4i16.nxv4i8( + , + , + , + , + i32); + +define @intrinsic_vwmaccu_mask_vv_nxv4i16_nxv4i8_nxv4i8( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_mask_vv_nxv4i16_nxv4i8_nxv4i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e8,mf2,ta,mu +; CHECK-NEXT: vwmaccu.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.mask.nxv4i16.nxv4i8( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccu.nxv8i16.nxv8i8( + , + , + , + i32); + +define @intrinsic_vwmaccu_vv_nxv8i16_nxv8i8_nxv8i8( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_vv_nxv8i16_nxv8i8_nxv8i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu +; CHECK-NEXT: vwmaccu.vv v16, v18, v19 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.nxv8i16.nxv8i8( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccu.mask.nxv8i16.nxv8i8( + , + , + , + , + i32); + +define @intrinsic_vwmaccu_mask_vv_nxv8i16_nxv8i8_nxv8i8( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_mask_vv_nxv8i16_nxv8i8_nxv8i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu +; CHECK-NEXT: vwmaccu.vv v16, v18, v19, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.mask.nxv8i16.nxv8i8( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccu.nxv16i16.nxv16i8( + , + , + , + i32); + +define @intrinsic_vwmaccu_vv_nxv16i16_nxv16i8_nxv16i8( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_vv_nxv16i16_nxv16i8_nxv16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e8,m2,ta,mu +; CHECK-NEXT: vwmaccu.vv v16, v20, v22 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.nxv16i16.nxv16i8( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccu.mask.nxv16i16.nxv16i8( + , + , + , + , + i32); + +define @intrinsic_vwmaccu_mask_vv_nxv16i16_nxv16i8_nxv16i8( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_mask_vv_nxv16i16_nxv16i8_nxv16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e8,m2,ta,mu +; CHECK-NEXT: vwmaccu.vv v16, v20, v22, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.mask.nxv16i16.nxv16i8( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccu.nxv32i16.nxv32i8( + , + , + , + i32); + +define @intrinsic_vwmaccu_vv_nxv32i16_nxv32i8_nxv32i8( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_vv_nxv32i16_nxv32i8_nxv32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e8,m4,ta,mu +; CHECK-NEXT: vle8.v v28, (a1) +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vsetvli a0, a2, e8,m4,ta,mu +; CHECK-NEXT: vwmaccu.vv v16, v8, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.nxv32i16.nxv32i8( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccu.mask.nxv32i16.nxv32i8( + , + , + , + , + i32); + +define @intrinsic_vwmaccu_mask_vv_nxv32i16_nxv32i8_nxv32i8( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_mask_vv_nxv32i16_nxv32i8_nxv32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e8,m4,ta,mu +; CHECK-NEXT: vle8.v v28, (a1) +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vsetvli a0, a2, e8,m4,ta,mu +; CHECK-NEXT: vwmaccu.vv v16, v8, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.mask.nxv32i16.nxv32i8( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccu.nxv1i32.nxv1i16( + , + , + , + i32); + +define @intrinsic_vwmaccu_vv_nxv1i32_nxv1i16_nxv1i16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_vv_nxv1i32_nxv1i16_nxv1i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu +; CHECK-NEXT: vwmaccu.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.nxv1i32.nxv1i16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccu.mask.nxv1i32.nxv1i16( + , + , + , + , + i32); + +define @intrinsic_vwmaccu_mask_vv_nxv1i32_nxv1i16_nxv1i16( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_mask_vv_nxv1i32_nxv1i16_nxv1i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu +; CHECK-NEXT: vwmaccu.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.mask.nxv1i32.nxv1i16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccu.nxv2i32.nxv2i16( + , + , + , + i32); + +define @intrinsic_vwmaccu_vv_nxv2i32_nxv2i16_nxv2i16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_vv_nxv2i32_nxv2i16_nxv2i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu +; CHECK-NEXT: vwmaccu.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.nxv2i32.nxv2i16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccu.mask.nxv2i32.nxv2i16( + , + , + , + , + i32); + +define @intrinsic_vwmaccu_mask_vv_nxv2i32_nxv2i16_nxv2i16( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_mask_vv_nxv2i32_nxv2i16_nxv2i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu +; CHECK-NEXT: vwmaccu.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.mask.nxv2i32.nxv2i16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccu.nxv4i32.nxv4i16( + , + , + , + i32); + +define @intrinsic_vwmaccu_vv_nxv4i32_nxv4i16_nxv4i16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_vv_nxv4i32_nxv4i16_nxv4i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vwmaccu.vv v16, v18, v19 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.nxv4i32.nxv4i16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccu.mask.nxv4i32.nxv4i16( + , + , + , + , + i32); + +define @intrinsic_vwmaccu_mask_vv_nxv4i32_nxv4i16_nxv4i16( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_mask_vv_nxv4i32_nxv4i16_nxv4i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vwmaccu.vv v16, v18, v19, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.mask.nxv4i32.nxv4i16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccu.nxv8i32.nxv8i16( + , + , + , + i32); + +define @intrinsic_vwmaccu_vv_nxv8i32_nxv8i16_nxv8i16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_vv_nxv8i32_nxv8i16_nxv8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu +; CHECK-NEXT: vwmaccu.vv v16, v20, v22 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.nxv8i32.nxv8i16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccu.mask.nxv8i32.nxv8i16( + , + , + , + , + i32); + +define @intrinsic_vwmaccu_mask_vv_nxv8i32_nxv8i16_nxv8i16( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_mask_vv_nxv8i32_nxv8i16_nxv8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu +; CHECK-NEXT: vwmaccu.vv v16, v20, v22, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.mask.nxv8i32.nxv8i16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccu.nxv16i32.nxv16i16( + , + , + , + i32); + +define @intrinsic_vwmaccu_vv_nxv16i32_nxv16i16_nxv16i16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_vv_nxv16i32_nxv16i16_nxv16i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a1) +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vsetvli a0, a2, e16,m4,ta,mu +; CHECK-NEXT: vwmaccu.vv v16, v8, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.nxv16i32.nxv16i16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccu.mask.nxv16i32.nxv16i16( + , + , + , + , + i32); + +define @intrinsic_vwmaccu_mask_vv_nxv16i32_nxv16i16_nxv16i16( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_mask_vv_nxv16i32_nxv16i16_nxv16i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a1) +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vsetvli a0, a2, e16,m4,ta,mu +; CHECK-NEXT: vwmaccu.vv v16, v8, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.mask.nxv16i32.nxv16i16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccu.nxv1i16.i8( + , + i8, + , + i32); + +define @intrinsic_vwmaccu_vx_nxv1i16_i8_nxv1i8( %0, i8 %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_vx_nxv1i16_i8_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,mf8,ta,mu +; CHECK-NEXT: vwmaccu.vx v16, a0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.nxv1i16.i8( + %0, + i8 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccu.mask.nxv1i16.i8( + , + i8, + , + , + i32); + +define @intrinsic_vwmaccu_mask_vx_nxv1i16_i8_nxv1i8( %0, i8 %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_mask_vx_nxv1i16_i8_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,mf8,ta,mu +; CHECK-NEXT: vwmaccu.vx v16, a0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.mask.nxv1i16.i8( + %0, + i8 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccu.nxv2i16.i8( + , + i8, + , + i32); + +define @intrinsic_vwmaccu_vx_nxv2i16_i8_nxv2i8( %0, i8 %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_vx_nxv2i16_i8_nxv2i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,mf4,ta,mu +; CHECK-NEXT: vwmaccu.vx v16, a0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.nxv2i16.i8( + %0, + i8 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccu.mask.nxv2i16.i8( + , + i8, + , + , + i32); + +define @intrinsic_vwmaccu_mask_vx_nxv2i16_i8_nxv2i8( %0, i8 %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_mask_vx_nxv2i16_i8_nxv2i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,mf4,ta,mu +; CHECK-NEXT: vwmaccu.vx v16, a0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.mask.nxv2i16.i8( + %0, + i8 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccu.nxv4i16.i8( + , + i8, + , + i32); + +define @intrinsic_vwmaccu_vx_nxv4i16_i8_nxv4i8( %0, i8 %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_vx_nxv4i16_i8_nxv4i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,mf2,ta,mu +; CHECK-NEXT: vwmaccu.vx v16, a0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.nxv4i16.i8( + %0, + i8 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccu.mask.nxv4i16.i8( + , + i8, + , + , + i32); + +define @intrinsic_vwmaccu_mask_vx_nxv4i16_i8_nxv4i8( %0, i8 %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_mask_vx_nxv4i16_i8_nxv4i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,mf2,ta,mu +; CHECK-NEXT: vwmaccu.vx v16, a0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.mask.nxv4i16.i8( + %0, + i8 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccu.nxv8i16.i8( + , + i8, + , + i32); + +define @intrinsic_vwmaccu_vx_nxv8i16_i8_nxv8i8( %0, i8 %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_vx_nxv8i16_i8_nxv8i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vwmaccu.vx v16, a0, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.nxv8i16.i8( + %0, + i8 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccu.mask.nxv8i16.i8( + , + i8, + , + , + i32); + +define @intrinsic_vwmaccu_mask_vx_nxv8i16_i8_nxv8i8( %0, i8 %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_mask_vx_nxv8i16_i8_nxv8i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vwmaccu.vx v16, a0, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.mask.nxv8i16.i8( + %0, + i8 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccu.nxv16i16.i8( + , + i8, + , + i32); + +define @intrinsic_vwmaccu_vx_nxv16i16_i8_nxv16i8( %0, i8 %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_vx_nxv16i16_i8_nxv16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,m2,ta,mu +; CHECK-NEXT: vwmaccu.vx v16, a0, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.nxv16i16.i8( + %0, + i8 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccu.mask.nxv16i16.i8( + , + i8, + , + , + i32); + +define @intrinsic_vwmaccu_mask_vx_nxv16i16_i8_nxv16i8( %0, i8 %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_mask_vx_nxv16i16_i8_nxv16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,m2,ta,mu +; CHECK-NEXT: vwmaccu.vx v16, a0, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.mask.nxv16i16.i8( + %0, + i8 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccu.nxv32i16.i8( + , + i8, + , + i32); + +define @intrinsic_vwmaccu_vx_nxv32i16_i8_nxv32i8( %0, i8 %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_vx_nxv32i16_i8_nxv32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e8,m4,ta,mu +; CHECK-NEXT: vle8.v v28, (a1) +; CHECK-NEXT: vsetvli a1, a2, e8,m4,ta,mu +; CHECK-NEXT: vwmaccu.vx v16, a0, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.nxv32i16.i8( + %0, + i8 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccu.mask.nxv32i16.i8( + , + i8, + , + , + i32); + +define @intrinsic_vwmaccu_mask_vx_nxv32i16_i8_nxv32i8( %0, i8 %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_mask_vx_nxv32i16_i8_nxv32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e8,m4,ta,mu +; CHECK-NEXT: vle8.v v28, (a1) +; CHECK-NEXT: vsetvli a1, a2, e8,m4,ta,mu +; CHECK-NEXT: vwmaccu.vx v16, a0, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.mask.nxv32i16.i8( + %0, + i8 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccu.nxv1i32.i16( + , + i16, + , + i32); + +define @intrinsic_vwmaccu_vx_nxv1i32_i16_nxv1i16( %0, i16 %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_vx_nxv1i32_i16_nxv1i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,mf4,ta,mu +; CHECK-NEXT: vwmaccu.vx v16, a0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.nxv1i32.i16( + %0, + i16 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccu.mask.nxv1i32.i16( + , + i16, + , + , + i32); + +define @intrinsic_vwmaccu_mask_vx_nxv1i32_i16_nxv1i16( %0, i16 %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_mask_vx_nxv1i32_i16_nxv1i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,mf4,ta,mu +; CHECK-NEXT: vwmaccu.vx v16, a0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.mask.nxv1i32.i16( + %0, + i16 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccu.nxv2i32.i16( + , + i16, + , + i32); + +define @intrinsic_vwmaccu_vx_nxv2i32_i16_nxv2i16( %0, i16 %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_vx_nxv2i32_i16_nxv2i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,mf2,ta,mu +; CHECK-NEXT: vwmaccu.vx v16, a0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.nxv2i32.i16( + %0, + i16 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccu.mask.nxv2i32.i16( + , + i16, + , + , + i32); + +define @intrinsic_vwmaccu_mask_vx_nxv2i32_i16_nxv2i16( %0, i16 %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_mask_vx_nxv2i32_i16_nxv2i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,mf2,ta,mu +; CHECK-NEXT: vwmaccu.vx v16, a0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.mask.nxv2i32.i16( + %0, + i16 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccu.nxv4i32.i16( + , + i16, + , + i32); + +define @intrinsic_vwmaccu_vx_nxv4i32_i16_nxv4i16( %0, i16 %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_vx_nxv4i32_i16_nxv4i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vwmaccu.vx v16, a0, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.nxv4i32.i16( + %0, + i16 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccu.mask.nxv4i32.i16( + , + i16, + , + , + i32); + +define @intrinsic_vwmaccu_mask_vx_nxv4i32_i16_nxv4i16( %0, i16 %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_mask_vx_nxv4i32_i16_nxv4i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vwmaccu.vx v16, a0, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.mask.nxv4i32.i16( + %0, + i16 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccu.nxv8i32.i16( + , + i16, + , + i32); + +define @intrinsic_vwmaccu_vx_nxv8i32_i16_nxv8i16( %0, i16 %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_vx_nxv8i32_i16_nxv8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,m2,ta,mu +; CHECK-NEXT: vwmaccu.vx v16, a0, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.nxv8i32.i16( + %0, + i16 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccu.mask.nxv8i32.i16( + , + i16, + , + , + i32); + +define @intrinsic_vwmaccu_mask_vx_nxv8i32_i16_nxv8i16( %0, i16 %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_mask_vx_nxv8i32_i16_nxv8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,m2,ta,mu +; CHECK-NEXT: vwmaccu.vx v16, a0, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.mask.nxv8i32.i16( + %0, + i16 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccu.nxv16i32.i16( + , + i16, + , + i32); + +define @intrinsic_vwmaccu_vx_nxv16i32_i16_nxv16i16( %0, i16 %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_vx_nxv16i32_i16_nxv16i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a1) +; CHECK-NEXT: vsetvli a1, a2, e16,m4,ta,mu +; CHECK-NEXT: vwmaccu.vx v16, a0, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.nxv16i32.i16( + %0, + i16 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccu.mask.nxv16i32.i16( + , + i16, + , + , + i32); + +define @intrinsic_vwmaccu_mask_vx_nxv16i32_i16_nxv16i16( %0, i16 %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_mask_vx_nxv16i32_i16_nxv16i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a1) +; CHECK-NEXT: vsetvli a1, a2, e16,m4,ta,mu +; CHECK-NEXT: vwmaccu.vx v16, a0, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.mask.nxv16i32.i16( + %0, + i16 %1, + %2, + %3, + i32 %4) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vwmaccu-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vwmaccu-rv64.ll new file mode 100644 index 0000000000000..be5d1779a22c9 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vwmaccu-rv64.ll @@ -0,0 +1,1412 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare @llvm.riscv.vwmaccu.nxv1i16.nxv1i8( + , + , + , + i64); + +define @intrinsic_vwmaccu_vv_nxv1i16_nxv1i8_nxv1i8( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_vv_nxv1i16_nxv1i8_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e8,mf8,ta,mu +; CHECK-NEXT: vwmaccu.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.nxv1i16.nxv1i8( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccu.mask.nxv1i16.nxv1i8( + , + , + , + , + i64); + +define @intrinsic_vwmaccu_mask_vv_nxv1i16_nxv1i8_nxv1i8( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_mask_vv_nxv1i16_nxv1i8_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e8,mf8,ta,mu +; CHECK-NEXT: vwmaccu.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.mask.nxv1i16.nxv1i8( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccu.nxv2i16.nxv2i8( + , + , + , + i64); + +define @intrinsic_vwmaccu_vv_nxv2i16_nxv2i8_nxv2i8( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_vv_nxv2i16_nxv2i8_nxv2i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e8,mf4,ta,mu +; CHECK-NEXT: vwmaccu.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.nxv2i16.nxv2i8( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccu.mask.nxv2i16.nxv2i8( + , + , + , + , + i64); + +define @intrinsic_vwmaccu_mask_vv_nxv2i16_nxv2i8_nxv2i8( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_mask_vv_nxv2i16_nxv2i8_nxv2i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e8,mf4,ta,mu +; CHECK-NEXT: vwmaccu.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.mask.nxv2i16.nxv2i8( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccu.nxv4i16.nxv4i8( + , + , + , + i64); + +define @intrinsic_vwmaccu_vv_nxv4i16_nxv4i8_nxv4i8( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_vv_nxv4i16_nxv4i8_nxv4i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e8,mf2,ta,mu +; CHECK-NEXT: vwmaccu.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.nxv4i16.nxv4i8( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccu.mask.nxv4i16.nxv4i8( + , + , + , + , + i64); + +define @intrinsic_vwmaccu_mask_vv_nxv4i16_nxv4i8_nxv4i8( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_mask_vv_nxv4i16_nxv4i8_nxv4i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e8,mf2,ta,mu +; CHECK-NEXT: vwmaccu.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.mask.nxv4i16.nxv4i8( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccu.nxv8i16.nxv8i8( + , + , + , + i64); + +define @intrinsic_vwmaccu_vv_nxv8i16_nxv8i8_nxv8i8( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_vv_nxv8i16_nxv8i8_nxv8i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu +; CHECK-NEXT: vwmaccu.vv v16, v18, v19 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.nxv8i16.nxv8i8( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccu.mask.nxv8i16.nxv8i8( + , + , + , + , + i64); + +define @intrinsic_vwmaccu_mask_vv_nxv8i16_nxv8i8_nxv8i8( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_mask_vv_nxv8i16_nxv8i8_nxv8i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu +; CHECK-NEXT: vwmaccu.vv v16, v18, v19, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.mask.nxv8i16.nxv8i8( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccu.nxv16i16.nxv16i8( + , + , + , + i64); + +define @intrinsic_vwmaccu_vv_nxv16i16_nxv16i8_nxv16i8( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_vv_nxv16i16_nxv16i8_nxv16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e8,m2,ta,mu +; CHECK-NEXT: vwmaccu.vv v16, v20, v22 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.nxv16i16.nxv16i8( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccu.mask.nxv16i16.nxv16i8( + , + , + , + , + i64); + +define @intrinsic_vwmaccu_mask_vv_nxv16i16_nxv16i8_nxv16i8( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_mask_vv_nxv16i16_nxv16i8_nxv16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e8,m2,ta,mu +; CHECK-NEXT: vwmaccu.vv v16, v20, v22, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.mask.nxv16i16.nxv16i8( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccu.nxv32i16.nxv32i8( + , + , + , + i64); + +define @intrinsic_vwmaccu_vv_nxv32i16_nxv32i8_nxv32i8( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_vv_nxv32i16_nxv32i8_nxv32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e8,m4,ta,mu +; CHECK-NEXT: vle8.v v28, (a1) +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vsetvli a0, a2, e8,m4,ta,mu +; CHECK-NEXT: vwmaccu.vv v16, v8, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.nxv32i16.nxv32i8( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccu.mask.nxv32i16.nxv32i8( + , + , + , + , + i64); + +define @intrinsic_vwmaccu_mask_vv_nxv32i16_nxv32i8_nxv32i8( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_mask_vv_nxv32i16_nxv32i8_nxv32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e8,m4,ta,mu +; CHECK-NEXT: vle8.v v28, (a1) +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vsetvli a0, a2, e8,m4,ta,mu +; CHECK-NEXT: vwmaccu.vv v16, v8, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.mask.nxv32i16.nxv32i8( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccu.nxv1i32.nxv1i16( + , + , + , + i64); + +define @intrinsic_vwmaccu_vv_nxv1i32_nxv1i16_nxv1i16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_vv_nxv1i32_nxv1i16_nxv1i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu +; CHECK-NEXT: vwmaccu.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.nxv1i32.nxv1i16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccu.mask.nxv1i32.nxv1i16( + , + , + , + , + i64); + +define @intrinsic_vwmaccu_mask_vv_nxv1i32_nxv1i16_nxv1i16( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_mask_vv_nxv1i32_nxv1i16_nxv1i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu +; CHECK-NEXT: vwmaccu.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.mask.nxv1i32.nxv1i16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccu.nxv2i32.nxv2i16( + , + , + , + i64); + +define @intrinsic_vwmaccu_vv_nxv2i32_nxv2i16_nxv2i16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_vv_nxv2i32_nxv2i16_nxv2i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu +; CHECK-NEXT: vwmaccu.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.nxv2i32.nxv2i16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccu.mask.nxv2i32.nxv2i16( + , + , + , + , + i64); + +define @intrinsic_vwmaccu_mask_vv_nxv2i32_nxv2i16_nxv2i16( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_mask_vv_nxv2i32_nxv2i16_nxv2i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu +; CHECK-NEXT: vwmaccu.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.mask.nxv2i32.nxv2i16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccu.nxv4i32.nxv4i16( + , + , + , + i64); + +define @intrinsic_vwmaccu_vv_nxv4i32_nxv4i16_nxv4i16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_vv_nxv4i32_nxv4i16_nxv4i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vwmaccu.vv v16, v18, v19 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.nxv4i32.nxv4i16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccu.mask.nxv4i32.nxv4i16( + , + , + , + , + i64); + +define @intrinsic_vwmaccu_mask_vv_nxv4i32_nxv4i16_nxv4i16( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_mask_vv_nxv4i32_nxv4i16_nxv4i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vwmaccu.vv v16, v18, v19, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.mask.nxv4i32.nxv4i16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccu.nxv8i32.nxv8i16( + , + , + , + i64); + +define @intrinsic_vwmaccu_vv_nxv8i32_nxv8i16_nxv8i16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_vv_nxv8i32_nxv8i16_nxv8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu +; CHECK-NEXT: vwmaccu.vv v16, v20, v22 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.nxv8i32.nxv8i16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccu.mask.nxv8i32.nxv8i16( + , + , + , + , + i64); + +define @intrinsic_vwmaccu_mask_vv_nxv8i32_nxv8i16_nxv8i16( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_mask_vv_nxv8i32_nxv8i16_nxv8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu +; CHECK-NEXT: vwmaccu.vv v16, v20, v22, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.mask.nxv8i32.nxv8i16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccu.nxv16i32.nxv16i16( + , + , + , + i64); + +define @intrinsic_vwmaccu_vv_nxv16i32_nxv16i16_nxv16i16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_vv_nxv16i32_nxv16i16_nxv16i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a1) +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vsetvli a0, a2, e16,m4,ta,mu +; CHECK-NEXT: vwmaccu.vv v16, v8, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.nxv16i32.nxv16i16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccu.mask.nxv16i32.nxv16i16( + , + , + , + , + i64); + +define @intrinsic_vwmaccu_mask_vv_nxv16i32_nxv16i16_nxv16i16( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_mask_vv_nxv16i32_nxv16i16_nxv16i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a1) +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vsetvli a0, a2, e16,m4,ta,mu +; CHECK-NEXT: vwmaccu.vv v16, v8, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.mask.nxv16i32.nxv16i16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccu.nxv1i64.nxv1i32( + , + , + , + i64); + +define @intrinsic_vwmaccu_vv_nxv1i64_nxv1i32_nxv1i32( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_vv_nxv1i64_nxv1i32_nxv1i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,mf2,ta,mu +; CHECK-NEXT: vwmaccu.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.nxv1i64.nxv1i32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccu.mask.nxv1i64.nxv1i32( + , + , + , + , + i64); + +define @intrinsic_vwmaccu_mask_vv_nxv1i64_nxv1i32_nxv1i32( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_mask_vv_nxv1i64_nxv1i32_nxv1i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,mf2,ta,mu +; CHECK-NEXT: vwmaccu.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.mask.nxv1i64.nxv1i32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccu.nxv2i64.nxv2i32( + , + , + , + i64); + +define @intrinsic_vwmaccu_vv_nxv2i64_nxv2i32_nxv2i32( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_vv_nxv2i64_nxv2i32_nxv2i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; CHECK-NEXT: vwmaccu.vv v16, v18, v19 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.nxv2i64.nxv2i32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccu.mask.nxv2i64.nxv2i32( + , + , + , + , + i64); + +define @intrinsic_vwmaccu_mask_vv_nxv2i64_nxv2i32_nxv2i32( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_mask_vv_nxv2i64_nxv2i32_nxv2i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; CHECK-NEXT: vwmaccu.vv v16, v18, v19, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.mask.nxv2i64.nxv2i32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccu.nxv4i64.nxv4i32( + , + , + , + i64); + +define @intrinsic_vwmaccu_vv_nxv4i64_nxv4i32_nxv4i32( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_vv_nxv4i64_nxv4i32_nxv4i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu +; CHECK-NEXT: vwmaccu.vv v16, v20, v22 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.nxv4i64.nxv4i32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccu.mask.nxv4i64.nxv4i32( + , + , + , + , + i64); + +define @intrinsic_vwmaccu_mask_vv_nxv4i64_nxv4i32_nxv4i32( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_mask_vv_nxv4i64_nxv4i32_nxv4i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu +; CHECK-NEXT: vwmaccu.vv v16, v20, v22, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.mask.nxv4i64.nxv4i32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccu.nxv8i64.nxv8i32( + , + , + , + i64); + +define @intrinsic_vwmaccu_vv_nxv8i64_nxv8i32_nxv8i32( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_vv_nxv8i64_nxv8i32_nxv8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e32,m4,ta,mu +; CHECK-NEXT: vle32.v v28, (a1) +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vsetvli a0, a2, e32,m4,ta,mu +; CHECK-NEXT: vwmaccu.vv v16, v8, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.nxv8i64.nxv8i32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccu.mask.nxv8i64.nxv8i32( + , + , + , + , + i64); + +define @intrinsic_vwmaccu_mask_vv_nxv8i64_nxv8i32_nxv8i32( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_mask_vv_nxv8i64_nxv8i32_nxv8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e32,m4,ta,mu +; CHECK-NEXT: vle32.v v28, (a1) +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vsetvli a0, a2, e32,m4,ta,mu +; CHECK-NEXT: vwmaccu.vv v16, v8, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.mask.nxv8i64.nxv8i32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccu.nxv1i16.i8( + , + i8, + , + i64); + +define @intrinsic_vwmaccu_vx_nxv1i16_i8_nxv1i8( %0, i8 %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_vx_nxv1i16_i8_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,mf8,ta,mu +; CHECK-NEXT: vwmaccu.vx v16, a0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.nxv1i16.i8( + %0, + i8 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccu.mask.nxv1i16.i8( + , + i8, + , + , + i64); + +define @intrinsic_vwmaccu_mask_vx_nxv1i16_i8_nxv1i8( %0, i8 %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_mask_vx_nxv1i16_i8_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,mf8,ta,mu +; CHECK-NEXT: vwmaccu.vx v16, a0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.mask.nxv1i16.i8( + %0, + i8 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccu.nxv2i16.i8( + , + i8, + , + i64); + +define @intrinsic_vwmaccu_vx_nxv2i16_i8_nxv2i8( %0, i8 %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_vx_nxv2i16_i8_nxv2i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,mf4,ta,mu +; CHECK-NEXT: vwmaccu.vx v16, a0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.nxv2i16.i8( + %0, + i8 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccu.mask.nxv2i16.i8( + , + i8, + , + , + i64); + +define @intrinsic_vwmaccu_mask_vx_nxv2i16_i8_nxv2i8( %0, i8 %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_mask_vx_nxv2i16_i8_nxv2i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,mf4,ta,mu +; CHECK-NEXT: vwmaccu.vx v16, a0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.mask.nxv2i16.i8( + %0, + i8 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccu.nxv4i16.i8( + , + i8, + , + i64); + +define @intrinsic_vwmaccu_vx_nxv4i16_i8_nxv4i8( %0, i8 %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_vx_nxv4i16_i8_nxv4i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,mf2,ta,mu +; CHECK-NEXT: vwmaccu.vx v16, a0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.nxv4i16.i8( + %0, + i8 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccu.mask.nxv4i16.i8( + , + i8, + , + , + i64); + +define @intrinsic_vwmaccu_mask_vx_nxv4i16_i8_nxv4i8( %0, i8 %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_mask_vx_nxv4i16_i8_nxv4i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,mf2,ta,mu +; CHECK-NEXT: vwmaccu.vx v16, a0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.mask.nxv4i16.i8( + %0, + i8 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccu.nxv8i16.i8( + , + i8, + , + i64); + +define @intrinsic_vwmaccu_vx_nxv8i16_i8_nxv8i8( %0, i8 %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_vx_nxv8i16_i8_nxv8i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vwmaccu.vx v16, a0, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.nxv8i16.i8( + %0, + i8 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccu.mask.nxv8i16.i8( + , + i8, + , + , + i64); + +define @intrinsic_vwmaccu_mask_vx_nxv8i16_i8_nxv8i8( %0, i8 %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_mask_vx_nxv8i16_i8_nxv8i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vwmaccu.vx v16, a0, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.mask.nxv8i16.i8( + %0, + i8 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccu.nxv16i16.i8( + , + i8, + , + i64); + +define @intrinsic_vwmaccu_vx_nxv16i16_i8_nxv16i8( %0, i8 %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_vx_nxv16i16_i8_nxv16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,m2,ta,mu +; CHECK-NEXT: vwmaccu.vx v16, a0, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.nxv16i16.i8( + %0, + i8 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccu.mask.nxv16i16.i8( + , + i8, + , + , + i64); + +define @intrinsic_vwmaccu_mask_vx_nxv16i16_i8_nxv16i8( %0, i8 %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_mask_vx_nxv16i16_i8_nxv16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,m2,ta,mu +; CHECK-NEXT: vwmaccu.vx v16, a0, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.mask.nxv16i16.i8( + %0, + i8 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccu.nxv32i16.i8( + , + i8, + , + i64); + +define @intrinsic_vwmaccu_vx_nxv32i16_i8_nxv32i8( %0, i8 %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_vx_nxv32i16_i8_nxv32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e8,m4,ta,mu +; CHECK-NEXT: vle8.v v28, (a1) +; CHECK-NEXT: vsetvli a1, a2, e8,m4,ta,mu +; CHECK-NEXT: vwmaccu.vx v16, a0, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.nxv32i16.i8( + %0, + i8 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccu.mask.nxv32i16.i8( + , + i8, + , + , + i64); + +define @intrinsic_vwmaccu_mask_vx_nxv32i16_i8_nxv32i8( %0, i8 %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_mask_vx_nxv32i16_i8_nxv32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e8,m4,ta,mu +; CHECK-NEXT: vle8.v v28, (a1) +; CHECK-NEXT: vsetvli a1, a2, e8,m4,ta,mu +; CHECK-NEXT: vwmaccu.vx v16, a0, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.mask.nxv32i16.i8( + %0, + i8 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccu.nxv1i32.i16( + , + i16, + , + i64); + +define @intrinsic_vwmaccu_vx_nxv1i32_i16_nxv1i16( %0, i16 %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_vx_nxv1i32_i16_nxv1i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,mf4,ta,mu +; CHECK-NEXT: vwmaccu.vx v16, a0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.nxv1i32.i16( + %0, + i16 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccu.mask.nxv1i32.i16( + , + i16, + , + , + i64); + +define @intrinsic_vwmaccu_mask_vx_nxv1i32_i16_nxv1i16( %0, i16 %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_mask_vx_nxv1i32_i16_nxv1i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,mf4,ta,mu +; CHECK-NEXT: vwmaccu.vx v16, a0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.mask.nxv1i32.i16( + %0, + i16 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccu.nxv2i32.i16( + , + i16, + , + i64); + +define @intrinsic_vwmaccu_vx_nxv2i32_i16_nxv2i16( %0, i16 %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_vx_nxv2i32_i16_nxv2i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,mf2,ta,mu +; CHECK-NEXT: vwmaccu.vx v16, a0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.nxv2i32.i16( + %0, + i16 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccu.mask.nxv2i32.i16( + , + i16, + , + , + i64); + +define @intrinsic_vwmaccu_mask_vx_nxv2i32_i16_nxv2i16( %0, i16 %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_mask_vx_nxv2i32_i16_nxv2i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,mf2,ta,mu +; CHECK-NEXT: vwmaccu.vx v16, a0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.mask.nxv2i32.i16( + %0, + i16 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccu.nxv4i32.i16( + , + i16, + , + i64); + +define @intrinsic_vwmaccu_vx_nxv4i32_i16_nxv4i16( %0, i16 %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_vx_nxv4i32_i16_nxv4i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vwmaccu.vx v16, a0, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.nxv4i32.i16( + %0, + i16 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccu.mask.nxv4i32.i16( + , + i16, + , + , + i64); + +define @intrinsic_vwmaccu_mask_vx_nxv4i32_i16_nxv4i16( %0, i16 %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_mask_vx_nxv4i32_i16_nxv4i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vwmaccu.vx v16, a0, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.mask.nxv4i32.i16( + %0, + i16 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccu.nxv8i32.i16( + , + i16, + , + i64); + +define @intrinsic_vwmaccu_vx_nxv8i32_i16_nxv8i16( %0, i16 %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_vx_nxv8i32_i16_nxv8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,m2,ta,mu +; CHECK-NEXT: vwmaccu.vx v16, a0, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.nxv8i32.i16( + %0, + i16 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccu.mask.nxv8i32.i16( + , + i16, + , + , + i64); + +define @intrinsic_vwmaccu_mask_vx_nxv8i32_i16_nxv8i16( %0, i16 %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_mask_vx_nxv8i32_i16_nxv8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,m2,ta,mu +; CHECK-NEXT: vwmaccu.vx v16, a0, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.mask.nxv8i32.i16( + %0, + i16 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccu.nxv16i32.i16( + , + i16, + , + i64); + +define @intrinsic_vwmaccu_vx_nxv16i32_i16_nxv16i16( %0, i16 %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_vx_nxv16i32_i16_nxv16i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a1) +; CHECK-NEXT: vsetvli a1, a2, e16,m4,ta,mu +; CHECK-NEXT: vwmaccu.vx v16, a0, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.nxv16i32.i16( + %0, + i16 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccu.mask.nxv16i32.i16( + , + i16, + , + , + i64); + +define @intrinsic_vwmaccu_mask_vx_nxv16i32_i16_nxv16i16( %0, i16 %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_mask_vx_nxv16i32_i16_nxv16i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a1) +; CHECK-NEXT: vsetvli a1, a2, e16,m4,ta,mu +; CHECK-NEXT: vwmaccu.vx v16, a0, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.mask.nxv16i32.i16( + %0, + i16 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccu.nxv1i64.i32( + , + i32, + , + i64); + +define @intrinsic_vwmaccu_vx_nxv1i64_i32_nxv1i32( %0, i32 %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_vx_nxv1i64_i32_nxv1i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e32,mf2,ta,mu +; CHECK-NEXT: vwmaccu.vx v16, a0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.nxv1i64.i32( + %0, + i32 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccu.mask.nxv1i64.i32( + , + i32, + , + , + i64); + +define @intrinsic_vwmaccu_mask_vx_nxv1i64_i32_nxv1i32( %0, i32 %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_mask_vx_nxv1i64_i32_nxv1i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e32,mf2,ta,mu +; CHECK-NEXT: vwmaccu.vx v16, a0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.mask.nxv1i64.i32( + %0, + i32 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccu.nxv2i64.i32( + , + i32, + , + i64); + +define @intrinsic_vwmaccu_vx_nxv2i64_i32_nxv2i32( %0, i32 %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_vx_nxv2i64_i32_nxv2i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; CHECK-NEXT: vwmaccu.vx v16, a0, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.nxv2i64.i32( + %0, + i32 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccu.mask.nxv2i64.i32( + , + i32, + , + , + i64); + +define @intrinsic_vwmaccu_mask_vx_nxv2i64_i32_nxv2i32( %0, i32 %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_mask_vx_nxv2i64_i32_nxv2i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; CHECK-NEXT: vwmaccu.vx v16, a0, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.mask.nxv2i64.i32( + %0, + i32 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccu.nxv4i64.i32( + , + i32, + , + i64); + +define @intrinsic_vwmaccu_vx_nxv4i64_i32_nxv4i32( %0, i32 %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_vx_nxv4i64_i32_nxv4i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e32,m2,ta,mu +; CHECK-NEXT: vwmaccu.vx v16, a0, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.nxv4i64.i32( + %0, + i32 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccu.mask.nxv4i64.i32( + , + i32, + , + , + i64); + +define @intrinsic_vwmaccu_mask_vx_nxv4i64_i32_nxv4i32( %0, i32 %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_mask_vx_nxv4i64_i32_nxv4i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e32,m2,ta,mu +; CHECK-NEXT: vwmaccu.vx v16, a0, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.mask.nxv4i64.i32( + %0, + i32 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccu.nxv8i64.i32( + , + i32, + , + i64); + +define @intrinsic_vwmaccu_vx_nxv8i64_i32_nxv8i32( %0, i32 %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_vx_nxv8i64_i32_nxv8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e32,m4,ta,mu +; CHECK-NEXT: vle32.v v28, (a1) +; CHECK-NEXT: vsetvli a1, a2, e32,m4,ta,mu +; CHECK-NEXT: vwmaccu.vx v16, a0, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.nxv8i64.i32( + %0, + i32 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccu.mask.nxv8i64.i32( + , + i32, + , + , + i64); + +define @intrinsic_vwmaccu_mask_vx_nxv8i64_i32_nxv8i32( %0, i32 %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccu_mask_vx_nxv8i64_i32_nxv8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e32,m4,ta,mu +; CHECK-NEXT: vle32.v v28, (a1) +; CHECK-NEXT: vsetvli a1, a2, e32,m4,ta,mu +; CHECK-NEXT: vwmaccu.vx v16, a0, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccu.mask.nxv8i64.i32( + %0, + i32 %1, + %2, + %3, + i64 %4) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vwmaccus-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vwmaccus-rv32.ll new file mode 100644 index 0000000000000..b9e0207f381ab --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vwmaccus-rv32.ll @@ -0,0 +1,516 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare @llvm.riscv.vwmaccus.nxv1i16.i8( + , + i8, + , + i32); + +define @intrinsic_vwmaccus_vx_nxv1i16_i8_nxv1i8( %0, i8 %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccus_vx_nxv1i16_i8_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,mf8,ta,mu +; CHECK-NEXT: vwmaccus.vx v16, a0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccus.nxv1i16.i8( + %0, + i8 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccus.mask.nxv1i16.i8( + , + i8, + , + , + i32); + +define @intrinsic_vwmaccus_mask_vx_nxv1i16_i8_nxv1i8( %0, i8 %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccus_mask_vx_nxv1i16_i8_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,mf8,ta,mu +; CHECK-NEXT: vwmaccus.vx v16, a0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccus.mask.nxv1i16.i8( + %0, + i8 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccus.nxv2i16.i8( + , + i8, + , + i32); + +define @intrinsic_vwmaccus_vx_nxv2i16_i8_nxv2i8( %0, i8 %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccus_vx_nxv2i16_i8_nxv2i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,mf4,ta,mu +; CHECK-NEXT: vwmaccus.vx v16, a0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccus.nxv2i16.i8( + %0, + i8 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccus.mask.nxv2i16.i8( + , + i8, + , + , + i32); + +define @intrinsic_vwmaccus_mask_vx_nxv2i16_i8_nxv2i8( %0, i8 %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccus_mask_vx_nxv2i16_i8_nxv2i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,mf4,ta,mu +; CHECK-NEXT: vwmaccus.vx v16, a0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccus.mask.nxv2i16.i8( + %0, + i8 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccus.nxv4i16.i8( + , + i8, + , + i32); + +define @intrinsic_vwmaccus_vx_nxv4i16_i8_nxv4i8( %0, i8 %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccus_vx_nxv4i16_i8_nxv4i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,mf2,ta,mu +; CHECK-NEXT: vwmaccus.vx v16, a0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccus.nxv4i16.i8( + %0, + i8 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccus.mask.nxv4i16.i8( + , + i8, + , + , + i32); + +define @intrinsic_vwmaccus_mask_vx_nxv4i16_i8_nxv4i8( %0, i8 %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccus_mask_vx_nxv4i16_i8_nxv4i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,mf2,ta,mu +; CHECK-NEXT: vwmaccus.vx v16, a0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccus.mask.nxv4i16.i8( + %0, + i8 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccus.nxv8i16.i8( + , + i8, + , + i32); + +define @intrinsic_vwmaccus_vx_nxv8i16_i8_nxv8i8( %0, i8 %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccus_vx_nxv8i16_i8_nxv8i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vwmaccus.vx v16, a0, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccus.nxv8i16.i8( + %0, + i8 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccus.mask.nxv8i16.i8( + , + i8, + , + , + i32); + +define @intrinsic_vwmaccus_mask_vx_nxv8i16_i8_nxv8i8( %0, i8 %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccus_mask_vx_nxv8i16_i8_nxv8i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vwmaccus.vx v16, a0, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccus.mask.nxv8i16.i8( + %0, + i8 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccus.nxv16i16.i8( + , + i8, + , + i32); + +define @intrinsic_vwmaccus_vx_nxv16i16_i8_nxv16i8( %0, i8 %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccus_vx_nxv16i16_i8_nxv16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,m2,ta,mu +; CHECK-NEXT: vwmaccus.vx v16, a0, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccus.nxv16i16.i8( + %0, + i8 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccus.mask.nxv16i16.i8( + , + i8, + , + , + i32); + +define @intrinsic_vwmaccus_mask_vx_nxv16i16_i8_nxv16i8( %0, i8 %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccus_mask_vx_nxv16i16_i8_nxv16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,m2,ta,mu +; CHECK-NEXT: vwmaccus.vx v16, a0, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccus.mask.nxv16i16.i8( + %0, + i8 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccus.nxv32i16.i8( + , + i8, + , + i32); + +define @intrinsic_vwmaccus_vx_nxv32i16_i8_nxv32i8( %0, i8 %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccus_vx_nxv32i16_i8_nxv32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e8,m4,ta,mu +; CHECK-NEXT: vle8.v v28, (a1) +; CHECK-NEXT: vsetvli a1, a2, e8,m4,ta,mu +; CHECK-NEXT: vwmaccus.vx v16, a0, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccus.nxv32i16.i8( + %0, + i8 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccus.mask.nxv32i16.i8( + , + i8, + , + , + i32); + +define @intrinsic_vwmaccus_mask_vx_nxv32i16_i8_nxv32i8( %0, i8 %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccus_mask_vx_nxv32i16_i8_nxv32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e8,m4,ta,mu +; CHECK-NEXT: vle8.v v28, (a1) +; CHECK-NEXT: vsetvli a1, a2, e8,m4,ta,mu +; CHECK-NEXT: vwmaccus.vx v16, a0, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccus.mask.nxv32i16.i8( + %0, + i8 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccus.nxv1i32.i16( + , + i16, + , + i32); + +define @intrinsic_vwmaccus_vx_nxv1i32_i16_nxv1i16( %0, i16 %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccus_vx_nxv1i32_i16_nxv1i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,mf4,ta,mu +; CHECK-NEXT: vwmaccus.vx v16, a0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccus.nxv1i32.i16( + %0, + i16 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccus.mask.nxv1i32.i16( + , + i16, + , + , + i32); + +define @intrinsic_vwmaccus_mask_vx_nxv1i32_i16_nxv1i16( %0, i16 %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccus_mask_vx_nxv1i32_i16_nxv1i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,mf4,ta,mu +; CHECK-NEXT: vwmaccus.vx v16, a0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccus.mask.nxv1i32.i16( + %0, + i16 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccus.nxv2i32.i16( + , + i16, + , + i32); + +define @intrinsic_vwmaccus_vx_nxv2i32_i16_nxv2i16( %0, i16 %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccus_vx_nxv2i32_i16_nxv2i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,mf2,ta,mu +; CHECK-NEXT: vwmaccus.vx v16, a0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccus.nxv2i32.i16( + %0, + i16 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccus.mask.nxv2i32.i16( + , + i16, + , + , + i32); + +define @intrinsic_vwmaccus_mask_vx_nxv2i32_i16_nxv2i16( %0, i16 %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccus_mask_vx_nxv2i32_i16_nxv2i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,mf2,ta,mu +; CHECK-NEXT: vwmaccus.vx v16, a0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccus.mask.nxv2i32.i16( + %0, + i16 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccus.nxv4i32.i16( + , + i16, + , + i32); + +define @intrinsic_vwmaccus_vx_nxv4i32_i16_nxv4i16( %0, i16 %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccus_vx_nxv4i32_i16_nxv4i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vwmaccus.vx v16, a0, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccus.nxv4i32.i16( + %0, + i16 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccus.mask.nxv4i32.i16( + , + i16, + , + , + i32); + +define @intrinsic_vwmaccus_mask_vx_nxv4i32_i16_nxv4i16( %0, i16 %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccus_mask_vx_nxv4i32_i16_nxv4i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vwmaccus.vx v16, a0, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccus.mask.nxv4i32.i16( + %0, + i16 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccus.nxv8i32.i16( + , + i16, + , + i32); + +define @intrinsic_vwmaccus_vx_nxv8i32_i16_nxv8i16( %0, i16 %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccus_vx_nxv8i32_i16_nxv8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,m2,ta,mu +; CHECK-NEXT: vwmaccus.vx v16, a0, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccus.nxv8i32.i16( + %0, + i16 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccus.mask.nxv8i32.i16( + , + i16, + , + , + i32); + +define @intrinsic_vwmaccus_mask_vx_nxv8i32_i16_nxv8i16( %0, i16 %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccus_mask_vx_nxv8i32_i16_nxv8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,m2,ta,mu +; CHECK-NEXT: vwmaccus.vx v16, a0, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccus.mask.nxv8i32.i16( + %0, + i16 %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccus.nxv16i32.i16( + , + i16, + , + i32); + +define @intrinsic_vwmaccus_vx_nxv16i32_i16_nxv16i16( %0, i16 %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccus_vx_nxv16i32_i16_nxv16i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a1) +; CHECK-NEXT: vsetvli a1, a2, e16,m4,ta,mu +; CHECK-NEXT: vwmaccus.vx v16, a0, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccus.nxv16i32.i16( + %0, + i16 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccus.mask.nxv16i32.i16( + , + i16, + , + , + i32); + +define @intrinsic_vwmaccus_mask_vx_nxv16i32_i16_nxv16i16( %0, i16 %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccus_mask_vx_nxv16i32_i16_nxv16i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a1) +; CHECK-NEXT: vsetvli a1, a2, e16,m4,ta,mu +; CHECK-NEXT: vwmaccus.vx v16, a0, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccus.mask.nxv16i32.i16( + %0, + i16 %1, + %2, + %3, + i32 %4) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vwmaccus-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vwmaccus-rv64.ll new file mode 100644 index 0000000000000..56964b8819d7b --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vwmaccus-rv64.ll @@ -0,0 +1,704 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare @llvm.riscv.vwmaccus.nxv1i16.i8( + , + i8, + , + i64); + +define @intrinsic_vwmaccus_vx_nxv1i16_i8_nxv1i8( %0, i8 %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccus_vx_nxv1i16_i8_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,mf8,ta,mu +; CHECK-NEXT: vwmaccus.vx v16, a0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccus.nxv1i16.i8( + %0, + i8 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccus.mask.nxv1i16.i8( + , + i8, + , + , + i64); + +define @intrinsic_vwmaccus_mask_vx_nxv1i16_i8_nxv1i8( %0, i8 %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccus_mask_vx_nxv1i16_i8_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,mf8,ta,mu +; CHECK-NEXT: vwmaccus.vx v16, a0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccus.mask.nxv1i16.i8( + %0, + i8 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccus.nxv2i16.i8( + , + i8, + , + i64); + +define @intrinsic_vwmaccus_vx_nxv2i16_i8_nxv2i8( %0, i8 %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccus_vx_nxv2i16_i8_nxv2i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,mf4,ta,mu +; CHECK-NEXT: vwmaccus.vx v16, a0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccus.nxv2i16.i8( + %0, + i8 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccus.mask.nxv2i16.i8( + , + i8, + , + , + i64); + +define @intrinsic_vwmaccus_mask_vx_nxv2i16_i8_nxv2i8( %0, i8 %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccus_mask_vx_nxv2i16_i8_nxv2i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,mf4,ta,mu +; CHECK-NEXT: vwmaccus.vx v16, a0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccus.mask.nxv2i16.i8( + %0, + i8 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccus.nxv4i16.i8( + , + i8, + , + i64); + +define @intrinsic_vwmaccus_vx_nxv4i16_i8_nxv4i8( %0, i8 %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccus_vx_nxv4i16_i8_nxv4i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,mf2,ta,mu +; CHECK-NEXT: vwmaccus.vx v16, a0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccus.nxv4i16.i8( + %0, + i8 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccus.mask.nxv4i16.i8( + , + i8, + , + , + i64); + +define @intrinsic_vwmaccus_mask_vx_nxv4i16_i8_nxv4i8( %0, i8 %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccus_mask_vx_nxv4i16_i8_nxv4i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,mf2,ta,mu +; CHECK-NEXT: vwmaccus.vx v16, a0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccus.mask.nxv4i16.i8( + %0, + i8 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccus.nxv8i16.i8( + , + i8, + , + i64); + +define @intrinsic_vwmaccus_vx_nxv8i16_i8_nxv8i8( %0, i8 %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccus_vx_nxv8i16_i8_nxv8i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vwmaccus.vx v16, a0, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccus.nxv8i16.i8( + %0, + i8 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccus.mask.nxv8i16.i8( + , + i8, + , + , + i64); + +define @intrinsic_vwmaccus_mask_vx_nxv8i16_i8_nxv8i8( %0, i8 %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccus_mask_vx_nxv8i16_i8_nxv8i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vwmaccus.vx v16, a0, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccus.mask.nxv8i16.i8( + %0, + i8 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccus.nxv16i16.i8( + , + i8, + , + i64); + +define @intrinsic_vwmaccus_vx_nxv16i16_i8_nxv16i8( %0, i8 %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccus_vx_nxv16i16_i8_nxv16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,m2,ta,mu +; CHECK-NEXT: vwmaccus.vx v16, a0, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccus.nxv16i16.i8( + %0, + i8 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccus.mask.nxv16i16.i8( + , + i8, + , + , + i64); + +define @intrinsic_vwmaccus_mask_vx_nxv16i16_i8_nxv16i8( %0, i8 %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccus_mask_vx_nxv16i16_i8_nxv16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e8,m2,ta,mu +; CHECK-NEXT: vwmaccus.vx v16, a0, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccus.mask.nxv16i16.i8( + %0, + i8 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccus.nxv32i16.i8( + , + i8, + , + i64); + +define @intrinsic_vwmaccus_vx_nxv32i16_i8_nxv32i8( %0, i8 %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccus_vx_nxv32i16_i8_nxv32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e8,m4,ta,mu +; CHECK-NEXT: vle8.v v28, (a1) +; CHECK-NEXT: vsetvli a1, a2, e8,m4,ta,mu +; CHECK-NEXT: vwmaccus.vx v16, a0, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccus.nxv32i16.i8( + %0, + i8 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccus.mask.nxv32i16.i8( + , + i8, + , + , + i64); + +define @intrinsic_vwmaccus_mask_vx_nxv32i16_i8_nxv32i8( %0, i8 %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccus_mask_vx_nxv32i16_i8_nxv32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e8,m4,ta,mu +; CHECK-NEXT: vle8.v v28, (a1) +; CHECK-NEXT: vsetvli a1, a2, e8,m4,ta,mu +; CHECK-NEXT: vwmaccus.vx v16, a0, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccus.mask.nxv32i16.i8( + %0, + i8 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccus.nxv1i32.i16( + , + i16, + , + i64); + +define @intrinsic_vwmaccus_vx_nxv1i32_i16_nxv1i16( %0, i16 %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccus_vx_nxv1i32_i16_nxv1i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,mf4,ta,mu +; CHECK-NEXT: vwmaccus.vx v16, a0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccus.nxv1i32.i16( + %0, + i16 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccus.mask.nxv1i32.i16( + , + i16, + , + , + i64); + +define @intrinsic_vwmaccus_mask_vx_nxv1i32_i16_nxv1i16( %0, i16 %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccus_mask_vx_nxv1i32_i16_nxv1i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,mf4,ta,mu +; CHECK-NEXT: vwmaccus.vx v16, a0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccus.mask.nxv1i32.i16( + %0, + i16 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccus.nxv2i32.i16( + , + i16, + , + i64); + +define @intrinsic_vwmaccus_vx_nxv2i32_i16_nxv2i16( %0, i16 %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccus_vx_nxv2i32_i16_nxv2i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,mf2,ta,mu +; CHECK-NEXT: vwmaccus.vx v16, a0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccus.nxv2i32.i16( + %0, + i16 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccus.mask.nxv2i32.i16( + , + i16, + , + , + i64); + +define @intrinsic_vwmaccus_mask_vx_nxv2i32_i16_nxv2i16( %0, i16 %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccus_mask_vx_nxv2i32_i16_nxv2i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,mf2,ta,mu +; CHECK-NEXT: vwmaccus.vx v16, a0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccus.mask.nxv2i32.i16( + %0, + i16 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccus.nxv4i32.i16( + , + i16, + , + i64); + +define @intrinsic_vwmaccus_vx_nxv4i32_i16_nxv4i16( %0, i16 %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccus_vx_nxv4i32_i16_nxv4i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vwmaccus.vx v16, a0, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccus.nxv4i32.i16( + %0, + i16 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccus.mask.nxv4i32.i16( + , + i16, + , + , + i64); + +define @intrinsic_vwmaccus_mask_vx_nxv4i32_i16_nxv4i16( %0, i16 %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccus_mask_vx_nxv4i32_i16_nxv4i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vwmaccus.vx v16, a0, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccus.mask.nxv4i32.i16( + %0, + i16 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccus.nxv8i32.i16( + , + i16, + , + i64); + +define @intrinsic_vwmaccus_vx_nxv8i32_i16_nxv8i16( %0, i16 %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccus_vx_nxv8i32_i16_nxv8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,m2,ta,mu +; CHECK-NEXT: vwmaccus.vx v16, a0, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccus.nxv8i32.i16( + %0, + i16 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccus.mask.nxv8i32.i16( + , + i16, + , + , + i64); + +define @intrinsic_vwmaccus_mask_vx_nxv8i32_i16_nxv8i16( %0, i16 %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccus_mask_vx_nxv8i32_i16_nxv8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e16,m2,ta,mu +; CHECK-NEXT: vwmaccus.vx v16, a0, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccus.mask.nxv8i32.i16( + %0, + i16 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccus.nxv16i32.i16( + , + i16, + , + i64); + +define @intrinsic_vwmaccus_vx_nxv16i32_i16_nxv16i16( %0, i16 %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccus_vx_nxv16i32_i16_nxv16i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a1) +; CHECK-NEXT: vsetvli a1, a2, e16,m4,ta,mu +; CHECK-NEXT: vwmaccus.vx v16, a0, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccus.nxv16i32.i16( + %0, + i16 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccus.mask.nxv16i32.i16( + , + i16, + , + , + i64); + +define @intrinsic_vwmaccus_mask_vx_nxv16i32_i16_nxv16i16( %0, i16 %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccus_mask_vx_nxv16i32_i16_nxv16i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a1) +; CHECK-NEXT: vsetvli a1, a2, e16,m4,ta,mu +; CHECK-NEXT: vwmaccus.vx v16, a0, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccus.mask.nxv16i32.i16( + %0, + i16 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccus.nxv1i64.i32( + , + i32, + , + i64); + +define @intrinsic_vwmaccus_vx_nxv1i64_i32_nxv1i32( %0, i32 %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccus_vx_nxv1i64_i32_nxv1i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e32,mf2,ta,mu +; CHECK-NEXT: vwmaccus.vx v16, a0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccus.nxv1i64.i32( + %0, + i32 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccus.mask.nxv1i64.i32( + , + i32, + , + , + i64); + +define @intrinsic_vwmaccus_mask_vx_nxv1i64_i32_nxv1i32( %0, i32 %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccus_mask_vx_nxv1i64_i32_nxv1i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e32,mf2,ta,mu +; CHECK-NEXT: vwmaccus.vx v16, a0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccus.mask.nxv1i64.i32( + %0, + i32 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccus.nxv2i64.i32( + , + i32, + , + i64); + +define @intrinsic_vwmaccus_vx_nxv2i64_i32_nxv2i32( %0, i32 %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccus_vx_nxv2i64_i32_nxv2i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; CHECK-NEXT: vwmaccus.vx v16, a0, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccus.nxv2i64.i32( + %0, + i32 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccus.mask.nxv2i64.i32( + , + i32, + , + , + i64); + +define @intrinsic_vwmaccus_mask_vx_nxv2i64_i32_nxv2i32( %0, i32 %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccus_mask_vx_nxv2i64_i32_nxv2i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; CHECK-NEXT: vwmaccus.vx v16, a0, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccus.mask.nxv2i64.i32( + %0, + i32 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccus.nxv4i64.i32( + , + i32, + , + i64); + +define @intrinsic_vwmaccus_vx_nxv4i64_i32_nxv4i32( %0, i32 %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccus_vx_nxv4i64_i32_nxv4i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e32,m2,ta,mu +; CHECK-NEXT: vwmaccus.vx v16, a0, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccus.nxv4i64.i32( + %0, + i32 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccus.mask.nxv4i64.i32( + , + i32, + , + , + i64); + +define @intrinsic_vwmaccus_mask_vx_nxv4i64_i32_nxv4i32( %0, i32 %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccus_mask_vx_nxv4i64_i32_nxv4i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, a1, e32,m2,ta,mu +; CHECK-NEXT: vwmaccus.vx v16, a0, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccus.mask.nxv4i64.i32( + %0, + i32 %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vwmaccus.nxv8i64.i32( + , + i32, + , + i64); + +define @intrinsic_vwmaccus_vx_nxv8i64_i32_nxv8i32( %0, i32 %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmaccus_vx_nxv8i64_i32_nxv8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e32,m4,ta,mu +; CHECK-NEXT: vle32.v v28, (a1) +; CHECK-NEXT: vsetvli a1, a2, e32,m4,ta,mu +; CHECK-NEXT: vwmaccus.vx v16, a0, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccus.nxv8i64.i32( + %0, + i32 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmaccus.mask.nxv8i64.i32( + , + i32, + , + , + i64); + +define @intrinsic_vwmaccus_mask_vx_nxv8i64_i32_nxv8i32( %0, i32 %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vwmaccus_mask_vx_nxv8i64_i32_nxv8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e32,m4,ta,mu +; CHECK-NEXT: vle32.v v28, (a1) +; CHECK-NEXT: vsetvli a1, a2, e32,m4,ta,mu +; CHECK-NEXT: vwmaccus.vx v16, a0, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vwmaccus.mask.nxv8i64.i32( + %0, + i32 %1, + %2, + %3, + i64 %4) + + ret %a +} From ad0a7ad950fec5e422e84f0d3f8942d5e1b116f6 Mon Sep 17 00:00:00 2001 From: ShihPo Hung Date: Tue, 22 Dec 2020 04:50:19 -0800 Subject: [PATCH 147/378] [RISCV] Add intrinsics for vf[n]macc/vf[n]msac/vf[n]madd/vf[n]msub instructions This patch defines vfmadd/vfnmacc, vfmsac/vfnmsac, vfmadd/vfnmadd, and vfmsub/vfnmsub lower to V instructions. Authored-by: Roger Ferrer Ibanez Co-Authored-by: ShihPo Hung Differential Revision: https://reviews.llvm.org/D93691 --- llvm/include/llvm/IR/IntrinsicsRISCV.td | 9 + .../Target/RISCV/RISCVInstrInfoVPseudos.td | 36 +- llvm/test/CodeGen/RISCV/rvv/vfmacc-rv32.ll | 856 ++++++++++++ llvm/test/CodeGen/RISCV/rvv/vfmacc-rv64.ll | 1142 +++++++++++++++++ llvm/test/CodeGen/RISCV/rvv/vfmadd-rv32.ll | 856 ++++++++++++ llvm/test/CodeGen/RISCV/rvv/vfmadd-rv64.ll | 1142 +++++++++++++++++ llvm/test/CodeGen/RISCV/rvv/vfmsac-rv32.ll | 856 ++++++++++++ llvm/test/CodeGen/RISCV/rvv/vfmsac-rv64.ll | 1142 +++++++++++++++++ llvm/test/CodeGen/RISCV/rvv/vfmsub-rv32.ll | 856 ++++++++++++ llvm/test/CodeGen/RISCV/rvv/vfmsub-rv64.ll | 1142 +++++++++++++++++ llvm/test/CodeGen/RISCV/rvv/vfnmacc-rv32.ll | 856 ++++++++++++ llvm/test/CodeGen/RISCV/rvv/vfnmacc-rv64.ll | 1142 +++++++++++++++++ llvm/test/CodeGen/RISCV/rvv/vfnmadd-rv32.ll | 856 ++++++++++++ llvm/test/CodeGen/RISCV/rvv/vfnmadd-rv64.ll | 1142 +++++++++++++++++ llvm/test/CodeGen/RISCV/rvv/vfnmsac-rv32.ll | 856 ++++++++++++ llvm/test/CodeGen/RISCV/rvv/vfnmsac-rv64.ll | 1142 +++++++++++++++++ llvm/test/CodeGen/RISCV/rvv/vfnmsub-rv32.ll | 856 ++++++++++++ llvm/test/CodeGen/RISCV/rvv/vfnmsub-rv64.ll | 1142 +++++++++++++++++ 18 files changed, 16024 insertions(+), 5 deletions(-) create mode 100644 llvm/test/CodeGen/RISCV/rvv/vfmacc-rv32.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vfmacc-rv64.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vfmadd-rv32.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vfmadd-rv64.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vfmsac-rv32.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vfmsac-rv64.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vfmsub-rv32.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vfmsub-rv64.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vfnmacc-rv32.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vfnmacc-rv64.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vfnmadd-rv32.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vfnmadd-rv64.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vfnmsac-rv32.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vfnmsac-rv64.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vfnmsub-rv32.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vfnmsub-rv64.ll diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td index ba0929b16ea59..dc080db5b30f5 100644 --- a/llvm/include/llvm/IR/IntrinsicsRISCV.td +++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td @@ -551,6 +551,15 @@ let TargetPrefix = "riscv" in { defm vfwmul : RISCVBinaryABX; + defm vfmacc : RISCVTernaryAAXA; + defm vfnmacc : RISCVTernaryAAXA; + defm vfmsac : RISCVTernaryAAXA; + defm vfnmsac : RISCVTernaryAAXA; + defm vfmadd : RISCVTernaryAAXA; + defm vfnmadd : RISCVTernaryAAXA; + defm vfmsub : RISCVTernaryAAXA; + defm vfnmsub : RISCVTernaryAAXA; + defm vfsgnj : RISCVBinaryAAX; defm vfsgnjn : RISCVBinaryAAX; defm vfsgnjx : RISCVBinaryAAX; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td index f3b6d2f5867f8..5c858b0e09825 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -885,9 +885,10 @@ multiclass VPseudoTernaryV_VX { defm _VX : VPseudoTernary; } -multiclass VPseudoTernaryV_VX_AAXA { +multiclass VPseudoTernaryV_VX_AAXA { foreach m = MxList.m in - defm _VX : VPseudoTernary; + defm !if(IsFloat, "_VF", "_VX") : VPseudoTernary; } multiclass VPseudoTernaryW_VV { @@ -907,9 +908,9 @@ multiclass VPseudoTernaryV_VI { defm _VI : VPseudoTernary; } -multiclass VPseudoTernaryV_VV_VX_AAXA { +multiclass VPseudoTernaryV_VV_VX_AAXA { defm "" : VPseudoTernaryV_VV; - defm "" : VPseudoTernaryV_VX_AAXA; + defm "" : VPseudoTernaryV_VX_AAXA; } multiclass VPseudoTernaryV_VX_VI { @@ -1593,7 +1594,8 @@ multiclass VPatTernaryV_VX vtilist> { foreach vti = vtilist in - defm : VPatTernary; @@ -1939,6 +1941,18 @@ defm PseudoVFRDIV : VPseudoBinaryV_VX; //===----------------------------------------------------------------------===// defm PseudoVFWMUL : VPseudoBinaryW_VV_VX; +//===----------------------------------------------------------------------===// +// 14.6. Vector Single-Width Floating-Point Fused Multiply-Add Instructions +//===----------------------------------------------------------------------===// +defm PseudoVFMACC : VPseudoTernaryV_VV_VX_AAXA; +defm PseudoVFNMACC : VPseudoTernaryV_VV_VX_AAXA; +defm PseudoVFMSAC : VPseudoTernaryV_VV_VX_AAXA; +defm PseudoVFNMSAC : VPseudoTernaryV_VV_VX_AAXA; +defm PseudoVFMADD : VPseudoTernaryV_VV_VX_AAXA; +defm PseudoVFNMADD : VPseudoTernaryV_VV_VX_AAXA; +defm PseudoVFMSUB : VPseudoTernaryV_VV_VX_AAXA; +defm PseudoVFNMSUB : VPseudoTernaryV_VV_VX_AAXA; + //===----------------------------------------------------------------------===// // 14.12. Vector Floating-Point Sign-Injection Instructions //===----------------------------------------------------------------------===// @@ -2320,6 +2334,18 @@ defm "" : VPatBinaryV_VX<"int_riscv_vfrdiv", "PseudoVFRDIV", AllFloatVectors>; //===----------------------------------------------------------------------===// defm "" : VPatBinaryW_VV_VX<"int_riscv_vfwmul", "PseudoVFWMUL", AllWidenableFloatVectors>; +//===----------------------------------------------------------------------===// +// 14.6. Vector Single-Width Floating-Point Fused Multiply-Add Instructions +//===----------------------------------------------------------------------===// +defm "" : VPatTernaryV_VV_VX_AAXA<"int_riscv_vfmacc", "PseudoVFMACC", AllFloatVectors>; +defm "" : VPatTernaryV_VV_VX_AAXA<"int_riscv_vfnmacc", "PseudoVFNMACC", AllFloatVectors>; +defm "" : VPatTernaryV_VV_VX_AAXA<"int_riscv_vfmsac", "PseudoVFMSAC", AllFloatVectors>; +defm "" : VPatTernaryV_VV_VX_AAXA<"int_riscv_vfnmsac", "PseudoVFNMSAC", AllFloatVectors>; +defm "" : VPatTernaryV_VV_VX_AAXA<"int_riscv_vfmadd", "PseudoVFMADD", AllFloatVectors>; +defm "" : VPatTernaryV_VV_VX_AAXA<"int_riscv_vfnmadd", "PseudoVFNMADD", AllFloatVectors>; +defm "" : VPatTernaryV_VV_VX_AAXA<"int_riscv_vfmsub", "PseudoVFMSUB", AllFloatVectors>; +defm "" : VPatTernaryV_VV_VX_AAXA<"int_riscv_vfnmsub", "PseudoVFNMSUB", AllFloatVectors>; + //===----------------------------------------------------------------------===// // 14.12. Vector Floating-Point Sign-Injection Instructions //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmacc-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfmacc-rv32.ll new file mode 100644 index 0000000000000..44f0ecb2f0d5f --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfmacc-rv32.ll @@ -0,0 +1,856 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f,+experimental-zfh -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare @llvm.riscv.vfmacc.nxv1f16.nxv1f16( + , + , + , + i32); + +define @intrinsic_vfmacc_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_vv_nxv1f16_nxv1f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu +; CHECK-NEXT: vfmacc.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmacc.nxv1f16.nxv1f16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmacc.mask.nxv1f16.nxv1f16( + , + , + , + , + i32); + +define @intrinsic_vfmacc_mask_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_mask_vv_nxv1f16_nxv1f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu +; CHECK-NEXT: vfmacc.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmacc.mask.nxv1f16.nxv1f16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmacc.nxv2f16.nxv2f16( + , + , + , + i32); + +define @intrinsic_vfmacc_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_vv_nxv2f16_nxv2f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu +; CHECK-NEXT: vfmacc.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmacc.nxv2f16.nxv2f16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmacc.mask.nxv2f16.nxv2f16( + , + , + , + , + i32); + +define @intrinsic_vfmacc_mask_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_mask_vv_nxv2f16_nxv2f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu +; CHECK-NEXT: vfmacc.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmacc.mask.nxv2f16.nxv2f16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmacc.nxv4f16.nxv4f16( + , + , + , + i32); + +define @intrinsic_vfmacc_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_vv_nxv4f16_nxv4f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vfmacc.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmacc.nxv4f16.nxv4f16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmacc.mask.nxv4f16.nxv4f16( + , + , + , + , + i32); + +define @intrinsic_vfmacc_mask_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_mask_vv_nxv4f16_nxv4f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vfmacc.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmacc.mask.nxv4f16.nxv4f16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmacc.nxv8f16.nxv8f16( + , + , + , + i32); + +define @intrinsic_vfmacc_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_vv_nxv8f16_nxv8f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu +; CHECK-NEXT: vfmacc.vv v16, v18, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmacc.nxv8f16.nxv8f16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmacc.mask.nxv8f16.nxv8f16( + , + , + , + , + i32); + +define @intrinsic_vfmacc_mask_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_mask_vv_nxv8f16_nxv8f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu +; CHECK-NEXT: vfmacc.vv v16, v18, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmacc.mask.nxv8f16.nxv8f16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmacc.nxv16f16.nxv16f16( + , + , + , + i32); + +define @intrinsic_vfmacc_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_vv_nxv16f16_nxv16f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a0) +; CHECK-NEXT: vsetvli a0, a1, e16,m4,ta,mu +; CHECK-NEXT: vfmacc.vv v16, v20, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmacc.nxv16f16.nxv16f16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmacc.mask.nxv16f16.nxv16f16( + , + , + , + , + i32); + +define @intrinsic_vfmacc_mask_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_mask_vv_nxv16f16_nxv16f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a0) +; CHECK-NEXT: vsetvli a0, a1, e16,m4,ta,mu +; CHECK-NEXT: vfmacc.vv v16, v20, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmacc.mask.nxv16f16.nxv16f16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmacc.nxv1f32.nxv1f32( + , + , + , + i32); + +define @intrinsic_vfmacc_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_vv_nxv1f32_nxv1f32_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,mf2,ta,mu +; CHECK-NEXT: vfmacc.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmacc.nxv1f32.nxv1f32( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmacc.mask.nxv1f32.nxv1f32( + , + , + , + , + i32); + +define @intrinsic_vfmacc_mask_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_mask_vv_nxv1f32_nxv1f32_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,mf2,ta,mu +; CHECK-NEXT: vfmacc.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmacc.mask.nxv1f32.nxv1f32( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmacc.nxv2f32.nxv2f32( + , + , + , + i32); + +define @intrinsic_vfmacc_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_vv_nxv2f32_nxv2f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; CHECK-NEXT: vfmacc.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmacc.nxv2f32.nxv2f32( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmacc.mask.nxv2f32.nxv2f32( + , + , + , + , + i32); + +define @intrinsic_vfmacc_mask_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_mask_vv_nxv2f32_nxv2f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; CHECK-NEXT: vfmacc.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmacc.mask.nxv2f32.nxv2f32( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmacc.nxv4f32.nxv4f32( + , + , + , + i32); + +define @intrinsic_vfmacc_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_vv_nxv4f32_nxv4f32_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu +; CHECK-NEXT: vfmacc.vv v16, v18, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmacc.nxv4f32.nxv4f32( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmacc.mask.nxv4f32.nxv4f32( + , + , + , + , + i32); + +define @intrinsic_vfmacc_mask_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_mask_vv_nxv4f32_nxv4f32_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu +; CHECK-NEXT: vfmacc.vv v16, v18, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmacc.mask.nxv4f32.nxv4f32( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmacc.nxv8f32.nxv8f32( + , + , + , + i32); + +define @intrinsic_vfmacc_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_vv_nxv8f32_nxv8f32_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e32,m4,ta,mu +; CHECK-NEXT: vle32.v v28, (a0) +; CHECK-NEXT: vsetvli a0, a1, e32,m4,ta,mu +; CHECK-NEXT: vfmacc.vv v16, v20, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmacc.nxv8f32.nxv8f32( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmacc.mask.nxv8f32.nxv8f32( + , + , + , + , + i32); + +define @intrinsic_vfmacc_mask_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_mask_vv_nxv8f32_nxv8f32_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e32,m4,ta,mu +; CHECK-NEXT: vle32.v v28, (a0) +; CHECK-NEXT: vsetvli a0, a1, e32,m4,ta,mu +; CHECK-NEXT: vfmacc.vv v16, v20, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmacc.mask.nxv8f32.nxv8f32( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmacc.nxv1f16.f16( + , + half, + , + i32); + +define @intrinsic_vfmacc_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_vf_nxv1f16_f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf4,ta,mu +; CHECK-NEXT: vfmacc.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmacc.nxv1f16.f16( + %0, + half %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmacc.mask.nxv1f16.f16( + , + half, + , + , + i32); + +define @intrinsic_vfmacc_mask_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv1f16_f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf4,ta,mu +; CHECK-NEXT: vfmacc.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmacc.mask.nxv1f16.f16( + %0, + half %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmacc.nxv2f16.f16( + , + half, + , + i32); + +define @intrinsic_vfmacc_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_vf_nxv2f16_f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf2,ta,mu +; CHECK-NEXT: vfmacc.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmacc.nxv2f16.f16( + %0, + half %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmacc.mask.nxv2f16.f16( + , + half, + , + , + i32); + +define @intrinsic_vfmacc_mask_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv2f16_f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf2,ta,mu +; CHECK-NEXT: vfmacc.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmacc.mask.nxv2f16.f16( + %0, + half %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmacc.nxv4f16.f16( + , + half, + , + i32); + +define @intrinsic_vfmacc_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_vf_nxv4f16_f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m1,ta,mu +; CHECK-NEXT: vfmacc.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmacc.nxv4f16.f16( + %0, + half %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmacc.mask.nxv4f16.f16( + , + half, + , + , + i32); + +define @intrinsic_vfmacc_mask_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv4f16_f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m1,ta,mu +; CHECK-NEXT: vfmacc.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmacc.mask.nxv4f16.f16( + %0, + half %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmacc.nxv8f16.f16( + , + half, + , + i32); + +define @intrinsic_vfmacc_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_vf_nxv8f16_f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m2,ta,mu +; CHECK-NEXT: vfmacc.vf v16, ft0, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmacc.nxv8f16.f16( + %0, + half %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmacc.mask.nxv8f16.f16( + , + half, + , + , + i32); + +define @intrinsic_vfmacc_mask_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv8f16_f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m2,ta,mu +; CHECK-NEXT: vfmacc.vf v16, ft0, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmacc.mask.nxv8f16.f16( + %0, + half %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmacc.nxv16f16.f16( + , + half, + , + i32); + +define @intrinsic_vfmacc_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_vf_nxv16f16_f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m4,ta,mu +; CHECK-NEXT: vfmacc.vf v16, ft0, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmacc.nxv16f16.f16( + %0, + half %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmacc.mask.nxv16f16.f16( + , + half, + , + , + i32); + +define @intrinsic_vfmacc_mask_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv16f16_f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m4,ta,mu +; CHECK-NEXT: vfmacc.vf v16, ft0, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmacc.mask.nxv16f16.f16( + %0, + half %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmacc.nxv1f32.f32( + , + float, + , + i32); + +define @intrinsic_vfmacc_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_vf_nxv1f32_f32_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,mf2,ta,mu +; CHECK-NEXT: vfmacc.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmacc.nxv1f32.f32( + %0, + float %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmacc.mask.nxv1f32.f32( + , + float, + , + , + i32); + +define @intrinsic_vfmacc_mask_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv1f32_f32_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,mf2,ta,mu +; CHECK-NEXT: vfmacc.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmacc.mask.nxv1f32.f32( + %0, + float %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmacc.nxv2f32.f32( + , + float, + , + i32); + +define @intrinsic_vfmacc_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_vf_nxv2f32_f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m1,ta,mu +; CHECK-NEXT: vfmacc.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmacc.nxv2f32.f32( + %0, + float %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmacc.mask.nxv2f32.f32( + , + float, + , + , + i32); + +define @intrinsic_vfmacc_mask_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv2f32_f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m1,ta,mu +; CHECK-NEXT: vfmacc.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmacc.mask.nxv2f32.f32( + %0, + float %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmacc.nxv4f32.f32( + , + float, + , + i32); + +define @intrinsic_vfmacc_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_vf_nxv4f32_f32_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m2,ta,mu +; CHECK-NEXT: vfmacc.vf v16, ft0, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmacc.nxv4f32.f32( + %0, + float %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmacc.mask.nxv4f32.f32( + , + float, + , + , + i32); + +define @intrinsic_vfmacc_mask_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv4f32_f32_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m2,ta,mu +; CHECK-NEXT: vfmacc.vf v16, ft0, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmacc.mask.nxv4f32.f32( + %0, + float %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmacc.nxv8f32.f32( + , + float, + , + i32); + +define @intrinsic_vfmacc_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_vf_nxv8f32_f32_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m4,ta,mu +; CHECK-NEXT: vfmacc.vf v16, ft0, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmacc.nxv8f32.f32( + %0, + float %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmacc.mask.nxv8f32.f32( + , + float, + , + , + i32); + +define @intrinsic_vfmacc_mask_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv8f32_f32_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m4,ta,mu +; CHECK-NEXT: vfmacc.vf v16, ft0, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmacc.mask.nxv8f32.f32( + %0, + float %1, + %2, + %3, + i32 %4) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmacc-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfmacc-rv64.ll new file mode 100644 index 0000000000000..6b83445292a3c --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfmacc-rv64.ll @@ -0,0 +1,1142 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+experimental-zfh -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare @llvm.riscv.vfmacc.nxv1f16.nxv1f16( + , + , + , + i64); + +define @intrinsic_vfmacc_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_vv_nxv1f16_nxv1f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu +; CHECK-NEXT: vfmacc.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmacc.nxv1f16.nxv1f16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmacc.mask.nxv1f16.nxv1f16( + , + , + , + , + i64); + +define @intrinsic_vfmacc_mask_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_mask_vv_nxv1f16_nxv1f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu +; CHECK-NEXT: vfmacc.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmacc.mask.nxv1f16.nxv1f16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmacc.nxv2f16.nxv2f16( + , + , + , + i64); + +define @intrinsic_vfmacc_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_vv_nxv2f16_nxv2f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu +; CHECK-NEXT: vfmacc.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmacc.nxv2f16.nxv2f16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmacc.mask.nxv2f16.nxv2f16( + , + , + , + , + i64); + +define @intrinsic_vfmacc_mask_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_mask_vv_nxv2f16_nxv2f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu +; CHECK-NEXT: vfmacc.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmacc.mask.nxv2f16.nxv2f16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmacc.nxv4f16.nxv4f16( + , + , + , + i64); + +define @intrinsic_vfmacc_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_vv_nxv4f16_nxv4f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vfmacc.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmacc.nxv4f16.nxv4f16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmacc.mask.nxv4f16.nxv4f16( + , + , + , + , + i64); + +define @intrinsic_vfmacc_mask_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_mask_vv_nxv4f16_nxv4f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vfmacc.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmacc.mask.nxv4f16.nxv4f16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmacc.nxv8f16.nxv8f16( + , + , + , + i64); + +define @intrinsic_vfmacc_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_vv_nxv8f16_nxv8f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu +; CHECK-NEXT: vfmacc.vv v16, v18, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmacc.nxv8f16.nxv8f16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmacc.mask.nxv8f16.nxv8f16( + , + , + , + , + i64); + +define @intrinsic_vfmacc_mask_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_mask_vv_nxv8f16_nxv8f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu +; CHECK-NEXT: vfmacc.vv v16, v18, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmacc.mask.nxv8f16.nxv8f16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmacc.nxv16f16.nxv16f16( + , + , + , + i64); + +define @intrinsic_vfmacc_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_vv_nxv16f16_nxv16f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a0) +; CHECK-NEXT: vsetvli a0, a1, e16,m4,ta,mu +; CHECK-NEXT: vfmacc.vv v16, v20, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmacc.nxv16f16.nxv16f16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmacc.mask.nxv16f16.nxv16f16( + , + , + , + , + i64); + +define @intrinsic_vfmacc_mask_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_mask_vv_nxv16f16_nxv16f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a0) +; CHECK-NEXT: vsetvli a0, a1, e16,m4,ta,mu +; CHECK-NEXT: vfmacc.vv v16, v20, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmacc.mask.nxv16f16.nxv16f16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmacc.nxv1f32.nxv1f32( + , + , + , + i64); + +define @intrinsic_vfmacc_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_vv_nxv1f32_nxv1f32_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,mf2,ta,mu +; CHECK-NEXT: vfmacc.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmacc.nxv1f32.nxv1f32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmacc.mask.nxv1f32.nxv1f32( + , + , + , + , + i64); + +define @intrinsic_vfmacc_mask_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_mask_vv_nxv1f32_nxv1f32_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,mf2,ta,mu +; CHECK-NEXT: vfmacc.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmacc.mask.nxv1f32.nxv1f32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmacc.nxv2f32.nxv2f32( + , + , + , + i64); + +define @intrinsic_vfmacc_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_vv_nxv2f32_nxv2f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; CHECK-NEXT: vfmacc.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmacc.nxv2f32.nxv2f32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmacc.mask.nxv2f32.nxv2f32( + , + , + , + , + i64); + +define @intrinsic_vfmacc_mask_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_mask_vv_nxv2f32_nxv2f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; CHECK-NEXT: vfmacc.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmacc.mask.nxv2f32.nxv2f32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmacc.nxv4f32.nxv4f32( + , + , + , + i64); + +define @intrinsic_vfmacc_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_vv_nxv4f32_nxv4f32_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu +; CHECK-NEXT: vfmacc.vv v16, v18, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmacc.nxv4f32.nxv4f32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmacc.mask.nxv4f32.nxv4f32( + , + , + , + , + i64); + +define @intrinsic_vfmacc_mask_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_mask_vv_nxv4f32_nxv4f32_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu +; CHECK-NEXT: vfmacc.vv v16, v18, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmacc.mask.nxv4f32.nxv4f32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmacc.nxv8f32.nxv8f32( + , + , + , + i64); + +define @intrinsic_vfmacc_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_vv_nxv8f32_nxv8f32_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e32,m4,ta,mu +; CHECK-NEXT: vle32.v v28, (a0) +; CHECK-NEXT: vsetvli a0, a1, e32,m4,ta,mu +; CHECK-NEXT: vfmacc.vv v16, v20, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmacc.nxv8f32.nxv8f32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmacc.mask.nxv8f32.nxv8f32( + , + , + , + , + i64); + +define @intrinsic_vfmacc_mask_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_mask_vv_nxv8f32_nxv8f32_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e32,m4,ta,mu +; CHECK-NEXT: vle32.v v28, (a0) +; CHECK-NEXT: vsetvli a0, a1, e32,m4,ta,mu +; CHECK-NEXT: vfmacc.vv v16, v20, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmacc.mask.nxv8f32.nxv8f32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmacc.nxv1f64.nxv1f64( + , + , + , + i64); + +define @intrinsic_vfmacc_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_vv_nxv1f64_nxv1f64_nxv1f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu +; CHECK-NEXT: vfmacc.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmacc.nxv1f64.nxv1f64( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmacc.mask.nxv1f64.nxv1f64( + , + , + , + , + i64); + +define @intrinsic_vfmacc_mask_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_mask_vv_nxv1f64_nxv1f64_nxv1f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu +; CHECK-NEXT: vfmacc.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmacc.mask.nxv1f64.nxv1f64( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmacc.nxv2f64.nxv2f64( + , + , + , + i64); + +define @intrinsic_vfmacc_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_vv_nxv2f64_nxv2f64_nxv2f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e64,m2,ta,mu +; CHECK-NEXT: vfmacc.vv v16, v18, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmacc.nxv2f64.nxv2f64( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmacc.mask.nxv2f64.nxv2f64( + , + , + , + , + i64); + +define @intrinsic_vfmacc_mask_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_mask_vv_nxv2f64_nxv2f64_nxv2f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e64,m2,ta,mu +; CHECK-NEXT: vfmacc.vv v16, v18, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmacc.mask.nxv2f64.nxv2f64( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmacc.nxv4f64.nxv4f64( + , + , + , + i64); + +define @intrinsic_vfmacc_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_vv_nxv4f64_nxv4f64_nxv4f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e64,m4,ta,mu +; CHECK-NEXT: vle64.v v28, (a0) +; CHECK-NEXT: vsetvli a0, a1, e64,m4,ta,mu +; CHECK-NEXT: vfmacc.vv v16, v20, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmacc.nxv4f64.nxv4f64( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmacc.mask.nxv4f64.nxv4f64( + , + , + , + , + i64); + +define @intrinsic_vfmacc_mask_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_mask_vv_nxv4f64_nxv4f64_nxv4f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e64,m4,ta,mu +; CHECK-NEXT: vle64.v v28, (a0) +; CHECK-NEXT: vsetvli a0, a1, e64,m4,ta,mu +; CHECK-NEXT: vfmacc.vv v16, v20, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmacc.mask.nxv4f64.nxv4f64( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmacc.nxv1f16.f16( + , + half, + , + i64); + +define @intrinsic_vfmacc_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_vf_nxv1f16_f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf4,ta,mu +; CHECK-NEXT: vfmacc.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmacc.nxv1f16.f16( + %0, + half %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmacc.mask.nxv1f16.f16( + , + half, + , + , + i64); + +define @intrinsic_vfmacc_mask_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv1f16_f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf4,ta,mu +; CHECK-NEXT: vfmacc.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmacc.mask.nxv1f16.f16( + %0, + half %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmacc.nxv2f16.f16( + , + half, + , + i64); + +define @intrinsic_vfmacc_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_vf_nxv2f16_f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf2,ta,mu +; CHECK-NEXT: vfmacc.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmacc.nxv2f16.f16( + %0, + half %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmacc.mask.nxv2f16.f16( + , + half, + , + , + i64); + +define @intrinsic_vfmacc_mask_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv2f16_f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf2,ta,mu +; CHECK-NEXT: vfmacc.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmacc.mask.nxv2f16.f16( + %0, + half %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmacc.nxv4f16.f16( + , + half, + , + i64); + +define @intrinsic_vfmacc_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_vf_nxv4f16_f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m1,ta,mu +; CHECK-NEXT: vfmacc.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmacc.nxv4f16.f16( + %0, + half %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmacc.mask.nxv4f16.f16( + , + half, + , + , + i64); + +define @intrinsic_vfmacc_mask_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv4f16_f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m1,ta,mu +; CHECK-NEXT: vfmacc.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmacc.mask.nxv4f16.f16( + %0, + half %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmacc.nxv8f16.f16( + , + half, + , + i64); + +define @intrinsic_vfmacc_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_vf_nxv8f16_f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m2,ta,mu +; CHECK-NEXT: vfmacc.vf v16, ft0, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmacc.nxv8f16.f16( + %0, + half %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmacc.mask.nxv8f16.f16( + , + half, + , + , + i64); + +define @intrinsic_vfmacc_mask_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv8f16_f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m2,ta,mu +; CHECK-NEXT: vfmacc.vf v16, ft0, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmacc.mask.nxv8f16.f16( + %0, + half %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmacc.nxv16f16.f16( + , + half, + , + i64); + +define @intrinsic_vfmacc_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_vf_nxv16f16_f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m4,ta,mu +; CHECK-NEXT: vfmacc.vf v16, ft0, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmacc.nxv16f16.f16( + %0, + half %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmacc.mask.nxv16f16.f16( + , + half, + , + , + i64); + +define @intrinsic_vfmacc_mask_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv16f16_f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m4,ta,mu +; CHECK-NEXT: vfmacc.vf v16, ft0, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmacc.mask.nxv16f16.f16( + %0, + half %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmacc.nxv1f32.f32( + , + float, + , + i64); + +define @intrinsic_vfmacc_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_vf_nxv1f32_f32_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,mf2,ta,mu +; CHECK-NEXT: vfmacc.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmacc.nxv1f32.f32( + %0, + float %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmacc.mask.nxv1f32.f32( + , + float, + , + , + i64); + +define @intrinsic_vfmacc_mask_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv1f32_f32_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,mf2,ta,mu +; CHECK-NEXT: vfmacc.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmacc.mask.nxv1f32.f32( + %0, + float %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmacc.nxv2f32.f32( + , + float, + , + i64); + +define @intrinsic_vfmacc_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_vf_nxv2f32_f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m1,ta,mu +; CHECK-NEXT: vfmacc.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmacc.nxv2f32.f32( + %0, + float %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmacc.mask.nxv2f32.f32( + , + float, + , + , + i64); + +define @intrinsic_vfmacc_mask_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv2f32_f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m1,ta,mu +; CHECK-NEXT: vfmacc.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmacc.mask.nxv2f32.f32( + %0, + float %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmacc.nxv4f32.f32( + , + float, + , + i64); + +define @intrinsic_vfmacc_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_vf_nxv4f32_f32_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m2,ta,mu +; CHECK-NEXT: vfmacc.vf v16, ft0, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmacc.nxv4f32.f32( + %0, + float %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmacc.mask.nxv4f32.f32( + , + float, + , + , + i64); + +define @intrinsic_vfmacc_mask_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv4f32_f32_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m2,ta,mu +; CHECK-NEXT: vfmacc.vf v16, ft0, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmacc.mask.nxv4f32.f32( + %0, + float %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmacc.nxv8f32.f32( + , + float, + , + i64); + +define @intrinsic_vfmacc_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_vf_nxv8f32_f32_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m4,ta,mu +; CHECK-NEXT: vfmacc.vf v16, ft0, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmacc.nxv8f32.f32( + %0, + float %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmacc.mask.nxv8f32.f32( + , + float, + , + , + i64); + +define @intrinsic_vfmacc_mask_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv8f32_f32_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m4,ta,mu +; CHECK-NEXT: vfmacc.vf v16, ft0, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmacc.mask.nxv8f32.f32( + %0, + float %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmacc.nxv1f64.f64( + , + double, + , + i64); + +define @intrinsic_vfmacc_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_vf_nxv1f64_f64_nxv1f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.d.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e64,m1,ta,mu +; CHECK-NEXT: vfmacc.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmacc.nxv1f64.f64( + %0, + double %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmacc.mask.nxv1f64.f64( + , + double, + , + , + i64); + +define @intrinsic_vfmacc_mask_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv1f64_f64_nxv1f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.d.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e64,m1,ta,mu +; CHECK-NEXT: vfmacc.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmacc.mask.nxv1f64.f64( + %0, + double %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmacc.nxv2f64.f64( + , + double, + , + i64); + +define @intrinsic_vfmacc_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_vf_nxv2f64_f64_nxv2f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.d.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e64,m2,ta,mu +; CHECK-NEXT: vfmacc.vf v16, ft0, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmacc.nxv2f64.f64( + %0, + double %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmacc.mask.nxv2f64.f64( + , + double, + , + , + i64); + +define @intrinsic_vfmacc_mask_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv2f64_f64_nxv2f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.d.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e64,m2,ta,mu +; CHECK-NEXT: vfmacc.vf v16, ft0, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmacc.mask.nxv2f64.f64( + %0, + double %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmacc.nxv4f64.f64( + , + double, + , + i64); + +define @intrinsic_vfmacc_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_vf_nxv4f64_f64_nxv4f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.d.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e64,m4,ta,mu +; CHECK-NEXT: vfmacc.vf v16, ft0, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmacc.nxv4f64.f64( + %0, + double %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmacc.mask.nxv4f64.f64( + , + double, + , + , + i64); + +define @intrinsic_vfmacc_mask_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv4f64_f64_nxv4f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.d.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e64,m4,ta,mu +; CHECK-NEXT: vfmacc.vf v16, ft0, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmacc.mask.nxv4f64.f64( + %0, + double %1, + %2, + %3, + i64 %4) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmadd-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfmadd-rv32.ll new file mode 100644 index 0000000000000..344a21bb08950 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfmadd-rv32.ll @@ -0,0 +1,856 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f,+experimental-zfh -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare @llvm.riscv.vfmadd.nxv1f16.nxv1f16( + , + , + , + i32); + +define @intrinsic_vfmadd_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_vv_nxv1f16_nxv1f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu +; CHECK-NEXT: vfmadd.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmadd.nxv1f16.nxv1f16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmadd.mask.nxv1f16.nxv1f16( + , + , + , + , + i32); + +define @intrinsic_vfmadd_mask_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_mask_vv_nxv1f16_nxv1f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu +; CHECK-NEXT: vfmadd.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmadd.mask.nxv1f16.nxv1f16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmadd.nxv2f16.nxv2f16( + , + , + , + i32); + +define @intrinsic_vfmadd_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_vv_nxv2f16_nxv2f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu +; CHECK-NEXT: vfmadd.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmadd.nxv2f16.nxv2f16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmadd.mask.nxv2f16.nxv2f16( + , + , + , + , + i32); + +define @intrinsic_vfmadd_mask_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_mask_vv_nxv2f16_nxv2f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu +; CHECK-NEXT: vfmadd.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmadd.mask.nxv2f16.nxv2f16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmadd.nxv4f16.nxv4f16( + , + , + , + i32); + +define @intrinsic_vfmadd_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_vv_nxv4f16_nxv4f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vfmadd.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmadd.nxv4f16.nxv4f16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmadd.mask.nxv4f16.nxv4f16( + , + , + , + , + i32); + +define @intrinsic_vfmadd_mask_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_mask_vv_nxv4f16_nxv4f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vfmadd.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmadd.mask.nxv4f16.nxv4f16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmadd.nxv8f16.nxv8f16( + , + , + , + i32); + +define @intrinsic_vfmadd_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_vv_nxv8f16_nxv8f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu +; CHECK-NEXT: vfmadd.vv v16, v18, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmadd.nxv8f16.nxv8f16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmadd.mask.nxv8f16.nxv8f16( + , + , + , + , + i32); + +define @intrinsic_vfmadd_mask_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_mask_vv_nxv8f16_nxv8f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu +; CHECK-NEXT: vfmadd.vv v16, v18, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmadd.mask.nxv8f16.nxv8f16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmadd.nxv16f16.nxv16f16( + , + , + , + i32); + +define @intrinsic_vfmadd_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_vv_nxv16f16_nxv16f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a0) +; CHECK-NEXT: vsetvli a0, a1, e16,m4,ta,mu +; CHECK-NEXT: vfmadd.vv v16, v20, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmadd.nxv16f16.nxv16f16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmadd.mask.nxv16f16.nxv16f16( + , + , + , + , + i32); + +define @intrinsic_vfmadd_mask_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_mask_vv_nxv16f16_nxv16f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a0) +; CHECK-NEXT: vsetvli a0, a1, e16,m4,ta,mu +; CHECK-NEXT: vfmadd.vv v16, v20, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmadd.mask.nxv16f16.nxv16f16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmadd.nxv1f32.nxv1f32( + , + , + , + i32); + +define @intrinsic_vfmadd_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_vv_nxv1f32_nxv1f32_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,mf2,ta,mu +; CHECK-NEXT: vfmadd.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmadd.nxv1f32.nxv1f32( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmadd.mask.nxv1f32.nxv1f32( + , + , + , + , + i32); + +define @intrinsic_vfmadd_mask_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_mask_vv_nxv1f32_nxv1f32_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,mf2,ta,mu +; CHECK-NEXT: vfmadd.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmadd.mask.nxv1f32.nxv1f32( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmadd.nxv2f32.nxv2f32( + , + , + , + i32); + +define @intrinsic_vfmadd_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_vv_nxv2f32_nxv2f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; CHECK-NEXT: vfmadd.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmadd.nxv2f32.nxv2f32( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmadd.mask.nxv2f32.nxv2f32( + , + , + , + , + i32); + +define @intrinsic_vfmadd_mask_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_mask_vv_nxv2f32_nxv2f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; CHECK-NEXT: vfmadd.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmadd.mask.nxv2f32.nxv2f32( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmadd.nxv4f32.nxv4f32( + , + , + , + i32); + +define @intrinsic_vfmadd_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_vv_nxv4f32_nxv4f32_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu +; CHECK-NEXT: vfmadd.vv v16, v18, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmadd.nxv4f32.nxv4f32( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmadd.mask.nxv4f32.nxv4f32( + , + , + , + , + i32); + +define @intrinsic_vfmadd_mask_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_mask_vv_nxv4f32_nxv4f32_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu +; CHECK-NEXT: vfmadd.vv v16, v18, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmadd.mask.nxv4f32.nxv4f32( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmadd.nxv8f32.nxv8f32( + , + , + , + i32); + +define @intrinsic_vfmadd_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_vv_nxv8f32_nxv8f32_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e32,m4,ta,mu +; CHECK-NEXT: vle32.v v28, (a0) +; CHECK-NEXT: vsetvli a0, a1, e32,m4,ta,mu +; CHECK-NEXT: vfmadd.vv v16, v20, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmadd.nxv8f32.nxv8f32( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmadd.mask.nxv8f32.nxv8f32( + , + , + , + , + i32); + +define @intrinsic_vfmadd_mask_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_mask_vv_nxv8f32_nxv8f32_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e32,m4,ta,mu +; CHECK-NEXT: vle32.v v28, (a0) +; CHECK-NEXT: vsetvli a0, a1, e32,m4,ta,mu +; CHECK-NEXT: vfmadd.vv v16, v20, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmadd.mask.nxv8f32.nxv8f32( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmadd.nxv1f16.f16( + , + half, + , + i32); + +define @intrinsic_vfmadd_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_vf_nxv1f16_f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf4,ta,mu +; CHECK-NEXT: vfmadd.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmadd.nxv1f16.f16( + %0, + half %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmadd.mask.nxv1f16.f16( + , + half, + , + , + i32); + +define @intrinsic_vfmadd_mask_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv1f16_f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf4,ta,mu +; CHECK-NEXT: vfmadd.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmadd.mask.nxv1f16.f16( + %0, + half %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmadd.nxv2f16.f16( + , + half, + , + i32); + +define @intrinsic_vfmadd_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_vf_nxv2f16_f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf2,ta,mu +; CHECK-NEXT: vfmadd.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmadd.nxv2f16.f16( + %0, + half %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmadd.mask.nxv2f16.f16( + , + half, + , + , + i32); + +define @intrinsic_vfmadd_mask_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv2f16_f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf2,ta,mu +; CHECK-NEXT: vfmadd.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmadd.mask.nxv2f16.f16( + %0, + half %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmadd.nxv4f16.f16( + , + half, + , + i32); + +define @intrinsic_vfmadd_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_vf_nxv4f16_f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m1,ta,mu +; CHECK-NEXT: vfmadd.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmadd.nxv4f16.f16( + %0, + half %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmadd.mask.nxv4f16.f16( + , + half, + , + , + i32); + +define @intrinsic_vfmadd_mask_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv4f16_f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m1,ta,mu +; CHECK-NEXT: vfmadd.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmadd.mask.nxv4f16.f16( + %0, + half %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmadd.nxv8f16.f16( + , + half, + , + i32); + +define @intrinsic_vfmadd_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_vf_nxv8f16_f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m2,ta,mu +; CHECK-NEXT: vfmadd.vf v16, ft0, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmadd.nxv8f16.f16( + %0, + half %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmadd.mask.nxv8f16.f16( + , + half, + , + , + i32); + +define @intrinsic_vfmadd_mask_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv8f16_f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m2,ta,mu +; CHECK-NEXT: vfmadd.vf v16, ft0, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmadd.mask.nxv8f16.f16( + %0, + half %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmadd.nxv16f16.f16( + , + half, + , + i32); + +define @intrinsic_vfmadd_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_vf_nxv16f16_f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m4,ta,mu +; CHECK-NEXT: vfmadd.vf v16, ft0, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmadd.nxv16f16.f16( + %0, + half %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmadd.mask.nxv16f16.f16( + , + half, + , + , + i32); + +define @intrinsic_vfmadd_mask_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv16f16_f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m4,ta,mu +; CHECK-NEXT: vfmadd.vf v16, ft0, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmadd.mask.nxv16f16.f16( + %0, + half %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmadd.nxv1f32.f32( + , + float, + , + i32); + +define @intrinsic_vfmadd_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_vf_nxv1f32_f32_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,mf2,ta,mu +; CHECK-NEXT: vfmadd.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmadd.nxv1f32.f32( + %0, + float %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmadd.mask.nxv1f32.f32( + , + float, + , + , + i32); + +define @intrinsic_vfmadd_mask_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv1f32_f32_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,mf2,ta,mu +; CHECK-NEXT: vfmadd.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmadd.mask.nxv1f32.f32( + %0, + float %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmadd.nxv2f32.f32( + , + float, + , + i32); + +define @intrinsic_vfmadd_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_vf_nxv2f32_f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m1,ta,mu +; CHECK-NEXT: vfmadd.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmadd.nxv2f32.f32( + %0, + float %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmadd.mask.nxv2f32.f32( + , + float, + , + , + i32); + +define @intrinsic_vfmadd_mask_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv2f32_f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m1,ta,mu +; CHECK-NEXT: vfmadd.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmadd.mask.nxv2f32.f32( + %0, + float %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmadd.nxv4f32.f32( + , + float, + , + i32); + +define @intrinsic_vfmadd_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_vf_nxv4f32_f32_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m2,ta,mu +; CHECK-NEXT: vfmadd.vf v16, ft0, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmadd.nxv4f32.f32( + %0, + float %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmadd.mask.nxv4f32.f32( + , + float, + , + , + i32); + +define @intrinsic_vfmadd_mask_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv4f32_f32_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m2,ta,mu +; CHECK-NEXT: vfmadd.vf v16, ft0, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmadd.mask.nxv4f32.f32( + %0, + float %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmadd.nxv8f32.f32( + , + float, + , + i32); + +define @intrinsic_vfmadd_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_vf_nxv8f32_f32_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m4,ta,mu +; CHECK-NEXT: vfmadd.vf v16, ft0, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmadd.nxv8f32.f32( + %0, + float %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmadd.mask.nxv8f32.f32( + , + float, + , + , + i32); + +define @intrinsic_vfmadd_mask_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv8f32_f32_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m4,ta,mu +; CHECK-NEXT: vfmadd.vf v16, ft0, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmadd.mask.nxv8f32.f32( + %0, + float %1, + %2, + %3, + i32 %4) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmadd-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfmadd-rv64.ll new file mode 100644 index 0000000000000..993f535777e05 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfmadd-rv64.ll @@ -0,0 +1,1142 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+experimental-zfh -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare @llvm.riscv.vfmadd.nxv1f16.nxv1f16( + , + , + , + i64); + +define @intrinsic_vfmadd_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_vv_nxv1f16_nxv1f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu +; CHECK-NEXT: vfmadd.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmadd.nxv1f16.nxv1f16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmadd.mask.nxv1f16.nxv1f16( + , + , + , + , + i64); + +define @intrinsic_vfmadd_mask_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_mask_vv_nxv1f16_nxv1f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu +; CHECK-NEXT: vfmadd.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmadd.mask.nxv1f16.nxv1f16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmadd.nxv2f16.nxv2f16( + , + , + , + i64); + +define @intrinsic_vfmadd_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_vv_nxv2f16_nxv2f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu +; CHECK-NEXT: vfmadd.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmadd.nxv2f16.nxv2f16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmadd.mask.nxv2f16.nxv2f16( + , + , + , + , + i64); + +define @intrinsic_vfmadd_mask_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_mask_vv_nxv2f16_nxv2f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu +; CHECK-NEXT: vfmadd.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmadd.mask.nxv2f16.nxv2f16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmadd.nxv4f16.nxv4f16( + , + , + , + i64); + +define @intrinsic_vfmadd_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_vv_nxv4f16_nxv4f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vfmadd.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmadd.nxv4f16.nxv4f16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmadd.mask.nxv4f16.nxv4f16( + , + , + , + , + i64); + +define @intrinsic_vfmadd_mask_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_mask_vv_nxv4f16_nxv4f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vfmadd.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmadd.mask.nxv4f16.nxv4f16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmadd.nxv8f16.nxv8f16( + , + , + , + i64); + +define @intrinsic_vfmadd_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_vv_nxv8f16_nxv8f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu +; CHECK-NEXT: vfmadd.vv v16, v18, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmadd.nxv8f16.nxv8f16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmadd.mask.nxv8f16.nxv8f16( + , + , + , + , + i64); + +define @intrinsic_vfmadd_mask_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_mask_vv_nxv8f16_nxv8f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu +; CHECK-NEXT: vfmadd.vv v16, v18, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmadd.mask.nxv8f16.nxv8f16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmadd.nxv16f16.nxv16f16( + , + , + , + i64); + +define @intrinsic_vfmadd_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_vv_nxv16f16_nxv16f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a0) +; CHECK-NEXT: vsetvli a0, a1, e16,m4,ta,mu +; CHECK-NEXT: vfmadd.vv v16, v20, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmadd.nxv16f16.nxv16f16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmadd.mask.nxv16f16.nxv16f16( + , + , + , + , + i64); + +define @intrinsic_vfmadd_mask_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_mask_vv_nxv16f16_nxv16f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a0) +; CHECK-NEXT: vsetvli a0, a1, e16,m4,ta,mu +; CHECK-NEXT: vfmadd.vv v16, v20, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmadd.mask.nxv16f16.nxv16f16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmadd.nxv1f32.nxv1f32( + , + , + , + i64); + +define @intrinsic_vfmadd_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_vv_nxv1f32_nxv1f32_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,mf2,ta,mu +; CHECK-NEXT: vfmadd.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmadd.nxv1f32.nxv1f32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmadd.mask.nxv1f32.nxv1f32( + , + , + , + , + i64); + +define @intrinsic_vfmadd_mask_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_mask_vv_nxv1f32_nxv1f32_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,mf2,ta,mu +; CHECK-NEXT: vfmadd.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmadd.mask.nxv1f32.nxv1f32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmadd.nxv2f32.nxv2f32( + , + , + , + i64); + +define @intrinsic_vfmadd_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_vv_nxv2f32_nxv2f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; CHECK-NEXT: vfmadd.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmadd.nxv2f32.nxv2f32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmadd.mask.nxv2f32.nxv2f32( + , + , + , + , + i64); + +define @intrinsic_vfmadd_mask_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_mask_vv_nxv2f32_nxv2f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; CHECK-NEXT: vfmadd.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmadd.mask.nxv2f32.nxv2f32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmadd.nxv4f32.nxv4f32( + , + , + , + i64); + +define @intrinsic_vfmadd_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_vv_nxv4f32_nxv4f32_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu +; CHECK-NEXT: vfmadd.vv v16, v18, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmadd.nxv4f32.nxv4f32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmadd.mask.nxv4f32.nxv4f32( + , + , + , + , + i64); + +define @intrinsic_vfmadd_mask_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_mask_vv_nxv4f32_nxv4f32_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu +; CHECK-NEXT: vfmadd.vv v16, v18, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmadd.mask.nxv4f32.nxv4f32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmadd.nxv8f32.nxv8f32( + , + , + , + i64); + +define @intrinsic_vfmadd_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_vv_nxv8f32_nxv8f32_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e32,m4,ta,mu +; CHECK-NEXT: vle32.v v28, (a0) +; CHECK-NEXT: vsetvli a0, a1, e32,m4,ta,mu +; CHECK-NEXT: vfmadd.vv v16, v20, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmadd.nxv8f32.nxv8f32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmadd.mask.nxv8f32.nxv8f32( + , + , + , + , + i64); + +define @intrinsic_vfmadd_mask_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_mask_vv_nxv8f32_nxv8f32_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e32,m4,ta,mu +; CHECK-NEXT: vle32.v v28, (a0) +; CHECK-NEXT: vsetvli a0, a1, e32,m4,ta,mu +; CHECK-NEXT: vfmadd.vv v16, v20, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmadd.mask.nxv8f32.nxv8f32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmadd.nxv1f64.nxv1f64( + , + , + , + i64); + +define @intrinsic_vfmadd_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_vv_nxv1f64_nxv1f64_nxv1f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu +; CHECK-NEXT: vfmadd.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmadd.nxv1f64.nxv1f64( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmadd.mask.nxv1f64.nxv1f64( + , + , + , + , + i64); + +define @intrinsic_vfmadd_mask_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_mask_vv_nxv1f64_nxv1f64_nxv1f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu +; CHECK-NEXT: vfmadd.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmadd.mask.nxv1f64.nxv1f64( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmadd.nxv2f64.nxv2f64( + , + , + , + i64); + +define @intrinsic_vfmadd_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_vv_nxv2f64_nxv2f64_nxv2f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e64,m2,ta,mu +; CHECK-NEXT: vfmadd.vv v16, v18, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmadd.nxv2f64.nxv2f64( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmadd.mask.nxv2f64.nxv2f64( + , + , + , + , + i64); + +define @intrinsic_vfmadd_mask_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_mask_vv_nxv2f64_nxv2f64_nxv2f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e64,m2,ta,mu +; CHECK-NEXT: vfmadd.vv v16, v18, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmadd.mask.nxv2f64.nxv2f64( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmadd.nxv4f64.nxv4f64( + , + , + , + i64); + +define @intrinsic_vfmadd_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_vv_nxv4f64_nxv4f64_nxv4f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e64,m4,ta,mu +; CHECK-NEXT: vle64.v v28, (a0) +; CHECK-NEXT: vsetvli a0, a1, e64,m4,ta,mu +; CHECK-NEXT: vfmadd.vv v16, v20, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmadd.nxv4f64.nxv4f64( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmadd.mask.nxv4f64.nxv4f64( + , + , + , + , + i64); + +define @intrinsic_vfmadd_mask_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_mask_vv_nxv4f64_nxv4f64_nxv4f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e64,m4,ta,mu +; CHECK-NEXT: vle64.v v28, (a0) +; CHECK-NEXT: vsetvli a0, a1, e64,m4,ta,mu +; CHECK-NEXT: vfmadd.vv v16, v20, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmadd.mask.nxv4f64.nxv4f64( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmadd.nxv1f16.f16( + , + half, + , + i64); + +define @intrinsic_vfmadd_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_vf_nxv1f16_f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf4,ta,mu +; CHECK-NEXT: vfmadd.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmadd.nxv1f16.f16( + %0, + half %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmadd.mask.nxv1f16.f16( + , + half, + , + , + i64); + +define @intrinsic_vfmadd_mask_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv1f16_f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf4,ta,mu +; CHECK-NEXT: vfmadd.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmadd.mask.nxv1f16.f16( + %0, + half %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmadd.nxv2f16.f16( + , + half, + , + i64); + +define @intrinsic_vfmadd_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_vf_nxv2f16_f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf2,ta,mu +; CHECK-NEXT: vfmadd.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmadd.nxv2f16.f16( + %0, + half %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmadd.mask.nxv2f16.f16( + , + half, + , + , + i64); + +define @intrinsic_vfmadd_mask_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv2f16_f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf2,ta,mu +; CHECK-NEXT: vfmadd.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmadd.mask.nxv2f16.f16( + %0, + half %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmadd.nxv4f16.f16( + , + half, + , + i64); + +define @intrinsic_vfmadd_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_vf_nxv4f16_f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m1,ta,mu +; CHECK-NEXT: vfmadd.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmadd.nxv4f16.f16( + %0, + half %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmadd.mask.nxv4f16.f16( + , + half, + , + , + i64); + +define @intrinsic_vfmadd_mask_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv4f16_f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m1,ta,mu +; CHECK-NEXT: vfmadd.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmadd.mask.nxv4f16.f16( + %0, + half %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmadd.nxv8f16.f16( + , + half, + , + i64); + +define @intrinsic_vfmadd_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_vf_nxv8f16_f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m2,ta,mu +; CHECK-NEXT: vfmadd.vf v16, ft0, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmadd.nxv8f16.f16( + %0, + half %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmadd.mask.nxv8f16.f16( + , + half, + , + , + i64); + +define @intrinsic_vfmadd_mask_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv8f16_f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m2,ta,mu +; CHECK-NEXT: vfmadd.vf v16, ft0, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmadd.mask.nxv8f16.f16( + %0, + half %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmadd.nxv16f16.f16( + , + half, + , + i64); + +define @intrinsic_vfmadd_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_vf_nxv16f16_f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m4,ta,mu +; CHECK-NEXT: vfmadd.vf v16, ft0, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmadd.nxv16f16.f16( + %0, + half %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmadd.mask.nxv16f16.f16( + , + half, + , + , + i64); + +define @intrinsic_vfmadd_mask_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv16f16_f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m4,ta,mu +; CHECK-NEXT: vfmadd.vf v16, ft0, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmadd.mask.nxv16f16.f16( + %0, + half %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmadd.nxv1f32.f32( + , + float, + , + i64); + +define @intrinsic_vfmadd_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_vf_nxv1f32_f32_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,mf2,ta,mu +; CHECK-NEXT: vfmadd.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmadd.nxv1f32.f32( + %0, + float %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmadd.mask.nxv1f32.f32( + , + float, + , + , + i64); + +define @intrinsic_vfmadd_mask_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv1f32_f32_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,mf2,ta,mu +; CHECK-NEXT: vfmadd.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmadd.mask.nxv1f32.f32( + %0, + float %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmadd.nxv2f32.f32( + , + float, + , + i64); + +define @intrinsic_vfmadd_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_vf_nxv2f32_f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m1,ta,mu +; CHECK-NEXT: vfmadd.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmadd.nxv2f32.f32( + %0, + float %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmadd.mask.nxv2f32.f32( + , + float, + , + , + i64); + +define @intrinsic_vfmadd_mask_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv2f32_f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m1,ta,mu +; CHECK-NEXT: vfmadd.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmadd.mask.nxv2f32.f32( + %0, + float %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmadd.nxv4f32.f32( + , + float, + , + i64); + +define @intrinsic_vfmadd_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_vf_nxv4f32_f32_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m2,ta,mu +; CHECK-NEXT: vfmadd.vf v16, ft0, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmadd.nxv4f32.f32( + %0, + float %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmadd.mask.nxv4f32.f32( + , + float, + , + , + i64); + +define @intrinsic_vfmadd_mask_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv4f32_f32_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m2,ta,mu +; CHECK-NEXT: vfmadd.vf v16, ft0, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmadd.mask.nxv4f32.f32( + %0, + float %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmadd.nxv8f32.f32( + , + float, + , + i64); + +define @intrinsic_vfmadd_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_vf_nxv8f32_f32_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m4,ta,mu +; CHECK-NEXT: vfmadd.vf v16, ft0, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmadd.nxv8f32.f32( + %0, + float %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmadd.mask.nxv8f32.f32( + , + float, + , + , + i64); + +define @intrinsic_vfmadd_mask_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv8f32_f32_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m4,ta,mu +; CHECK-NEXT: vfmadd.vf v16, ft0, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmadd.mask.nxv8f32.f32( + %0, + float %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmadd.nxv1f64.f64( + , + double, + , + i64); + +define @intrinsic_vfmadd_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_vf_nxv1f64_f64_nxv1f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.d.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e64,m1,ta,mu +; CHECK-NEXT: vfmadd.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmadd.nxv1f64.f64( + %0, + double %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmadd.mask.nxv1f64.f64( + , + double, + , + , + i64); + +define @intrinsic_vfmadd_mask_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv1f64_f64_nxv1f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.d.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e64,m1,ta,mu +; CHECK-NEXT: vfmadd.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmadd.mask.nxv1f64.f64( + %0, + double %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmadd.nxv2f64.f64( + , + double, + , + i64); + +define @intrinsic_vfmadd_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_vf_nxv2f64_f64_nxv2f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.d.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e64,m2,ta,mu +; CHECK-NEXT: vfmadd.vf v16, ft0, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmadd.nxv2f64.f64( + %0, + double %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmadd.mask.nxv2f64.f64( + , + double, + , + , + i64); + +define @intrinsic_vfmadd_mask_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv2f64_f64_nxv2f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.d.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e64,m2,ta,mu +; CHECK-NEXT: vfmadd.vf v16, ft0, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmadd.mask.nxv2f64.f64( + %0, + double %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmadd.nxv4f64.f64( + , + double, + , + i64); + +define @intrinsic_vfmadd_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_vf_nxv4f64_f64_nxv4f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.d.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e64,m4,ta,mu +; CHECK-NEXT: vfmadd.vf v16, ft0, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmadd.nxv4f64.f64( + %0, + double %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmadd.mask.nxv4f64.f64( + , + double, + , + , + i64); + +define @intrinsic_vfmadd_mask_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv4f64_f64_nxv4f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.d.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e64,m4,ta,mu +; CHECK-NEXT: vfmadd.vf v16, ft0, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmadd.mask.nxv4f64.f64( + %0, + double %1, + %2, + %3, + i64 %4) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmsac-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfmsac-rv32.ll new file mode 100644 index 0000000000000..d86f9462c6f2a --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfmsac-rv32.ll @@ -0,0 +1,856 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f,+experimental-zfh -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare @llvm.riscv.vfmsac.nxv1f16.nxv1f16( + , + , + , + i32); + +define @intrinsic_vfmsac_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_vv_nxv1f16_nxv1f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu +; CHECK-NEXT: vfmsac.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsac.nxv1f16.nxv1f16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmsac.mask.nxv1f16.nxv1f16( + , + , + , + , + i32); + +define @intrinsic_vfmsac_mask_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_mask_vv_nxv1f16_nxv1f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu +; CHECK-NEXT: vfmsac.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsac.mask.nxv1f16.nxv1f16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmsac.nxv2f16.nxv2f16( + , + , + , + i32); + +define @intrinsic_vfmsac_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_vv_nxv2f16_nxv2f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu +; CHECK-NEXT: vfmsac.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsac.nxv2f16.nxv2f16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmsac.mask.nxv2f16.nxv2f16( + , + , + , + , + i32); + +define @intrinsic_vfmsac_mask_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_mask_vv_nxv2f16_nxv2f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu +; CHECK-NEXT: vfmsac.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsac.mask.nxv2f16.nxv2f16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmsac.nxv4f16.nxv4f16( + , + , + , + i32); + +define @intrinsic_vfmsac_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_vv_nxv4f16_nxv4f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vfmsac.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsac.nxv4f16.nxv4f16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmsac.mask.nxv4f16.nxv4f16( + , + , + , + , + i32); + +define @intrinsic_vfmsac_mask_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_mask_vv_nxv4f16_nxv4f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vfmsac.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsac.mask.nxv4f16.nxv4f16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmsac.nxv8f16.nxv8f16( + , + , + , + i32); + +define @intrinsic_vfmsac_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_vv_nxv8f16_nxv8f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu +; CHECK-NEXT: vfmsac.vv v16, v18, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsac.nxv8f16.nxv8f16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmsac.mask.nxv8f16.nxv8f16( + , + , + , + , + i32); + +define @intrinsic_vfmsac_mask_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_mask_vv_nxv8f16_nxv8f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu +; CHECK-NEXT: vfmsac.vv v16, v18, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsac.mask.nxv8f16.nxv8f16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmsac.nxv16f16.nxv16f16( + , + , + , + i32); + +define @intrinsic_vfmsac_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_vv_nxv16f16_nxv16f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a0) +; CHECK-NEXT: vsetvli a0, a1, e16,m4,ta,mu +; CHECK-NEXT: vfmsac.vv v16, v20, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsac.nxv16f16.nxv16f16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmsac.mask.nxv16f16.nxv16f16( + , + , + , + , + i32); + +define @intrinsic_vfmsac_mask_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_mask_vv_nxv16f16_nxv16f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a0) +; CHECK-NEXT: vsetvli a0, a1, e16,m4,ta,mu +; CHECK-NEXT: vfmsac.vv v16, v20, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsac.mask.nxv16f16.nxv16f16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmsac.nxv1f32.nxv1f32( + , + , + , + i32); + +define @intrinsic_vfmsac_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_vv_nxv1f32_nxv1f32_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,mf2,ta,mu +; CHECK-NEXT: vfmsac.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsac.nxv1f32.nxv1f32( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmsac.mask.nxv1f32.nxv1f32( + , + , + , + , + i32); + +define @intrinsic_vfmsac_mask_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_mask_vv_nxv1f32_nxv1f32_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,mf2,ta,mu +; CHECK-NEXT: vfmsac.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsac.mask.nxv1f32.nxv1f32( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmsac.nxv2f32.nxv2f32( + , + , + , + i32); + +define @intrinsic_vfmsac_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_vv_nxv2f32_nxv2f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; CHECK-NEXT: vfmsac.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsac.nxv2f32.nxv2f32( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmsac.mask.nxv2f32.nxv2f32( + , + , + , + , + i32); + +define @intrinsic_vfmsac_mask_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_mask_vv_nxv2f32_nxv2f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; CHECK-NEXT: vfmsac.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsac.mask.nxv2f32.nxv2f32( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmsac.nxv4f32.nxv4f32( + , + , + , + i32); + +define @intrinsic_vfmsac_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_vv_nxv4f32_nxv4f32_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu +; CHECK-NEXT: vfmsac.vv v16, v18, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsac.nxv4f32.nxv4f32( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmsac.mask.nxv4f32.nxv4f32( + , + , + , + , + i32); + +define @intrinsic_vfmsac_mask_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_mask_vv_nxv4f32_nxv4f32_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu +; CHECK-NEXT: vfmsac.vv v16, v18, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsac.mask.nxv4f32.nxv4f32( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmsac.nxv8f32.nxv8f32( + , + , + , + i32); + +define @intrinsic_vfmsac_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_vv_nxv8f32_nxv8f32_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e32,m4,ta,mu +; CHECK-NEXT: vle32.v v28, (a0) +; CHECK-NEXT: vsetvli a0, a1, e32,m4,ta,mu +; CHECK-NEXT: vfmsac.vv v16, v20, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsac.nxv8f32.nxv8f32( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmsac.mask.nxv8f32.nxv8f32( + , + , + , + , + i32); + +define @intrinsic_vfmsac_mask_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_mask_vv_nxv8f32_nxv8f32_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e32,m4,ta,mu +; CHECK-NEXT: vle32.v v28, (a0) +; CHECK-NEXT: vsetvli a0, a1, e32,m4,ta,mu +; CHECK-NEXT: vfmsac.vv v16, v20, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsac.mask.nxv8f32.nxv8f32( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmsac.nxv1f16.f16( + , + half, + , + i32); + +define @intrinsic_vfmsac_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_vf_nxv1f16_f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf4,ta,mu +; CHECK-NEXT: vfmsac.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsac.nxv1f16.f16( + %0, + half %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmsac.mask.nxv1f16.f16( + , + half, + , + , + i32); + +define @intrinsic_vfmsac_mask_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv1f16_f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf4,ta,mu +; CHECK-NEXT: vfmsac.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsac.mask.nxv1f16.f16( + %0, + half %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmsac.nxv2f16.f16( + , + half, + , + i32); + +define @intrinsic_vfmsac_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_vf_nxv2f16_f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf2,ta,mu +; CHECK-NEXT: vfmsac.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsac.nxv2f16.f16( + %0, + half %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmsac.mask.nxv2f16.f16( + , + half, + , + , + i32); + +define @intrinsic_vfmsac_mask_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv2f16_f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf2,ta,mu +; CHECK-NEXT: vfmsac.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsac.mask.nxv2f16.f16( + %0, + half %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmsac.nxv4f16.f16( + , + half, + , + i32); + +define @intrinsic_vfmsac_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_vf_nxv4f16_f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m1,ta,mu +; CHECK-NEXT: vfmsac.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsac.nxv4f16.f16( + %0, + half %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmsac.mask.nxv4f16.f16( + , + half, + , + , + i32); + +define @intrinsic_vfmsac_mask_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv4f16_f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m1,ta,mu +; CHECK-NEXT: vfmsac.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsac.mask.nxv4f16.f16( + %0, + half %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmsac.nxv8f16.f16( + , + half, + , + i32); + +define @intrinsic_vfmsac_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_vf_nxv8f16_f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m2,ta,mu +; CHECK-NEXT: vfmsac.vf v16, ft0, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsac.nxv8f16.f16( + %0, + half %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmsac.mask.nxv8f16.f16( + , + half, + , + , + i32); + +define @intrinsic_vfmsac_mask_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv8f16_f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m2,ta,mu +; CHECK-NEXT: vfmsac.vf v16, ft0, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsac.mask.nxv8f16.f16( + %0, + half %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmsac.nxv16f16.f16( + , + half, + , + i32); + +define @intrinsic_vfmsac_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_vf_nxv16f16_f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m4,ta,mu +; CHECK-NEXT: vfmsac.vf v16, ft0, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsac.nxv16f16.f16( + %0, + half %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmsac.mask.nxv16f16.f16( + , + half, + , + , + i32); + +define @intrinsic_vfmsac_mask_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv16f16_f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m4,ta,mu +; CHECK-NEXT: vfmsac.vf v16, ft0, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsac.mask.nxv16f16.f16( + %0, + half %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmsac.nxv1f32.f32( + , + float, + , + i32); + +define @intrinsic_vfmsac_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_vf_nxv1f32_f32_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,mf2,ta,mu +; CHECK-NEXT: vfmsac.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsac.nxv1f32.f32( + %0, + float %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmsac.mask.nxv1f32.f32( + , + float, + , + , + i32); + +define @intrinsic_vfmsac_mask_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv1f32_f32_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,mf2,ta,mu +; CHECK-NEXT: vfmsac.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsac.mask.nxv1f32.f32( + %0, + float %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmsac.nxv2f32.f32( + , + float, + , + i32); + +define @intrinsic_vfmsac_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_vf_nxv2f32_f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m1,ta,mu +; CHECK-NEXT: vfmsac.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsac.nxv2f32.f32( + %0, + float %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmsac.mask.nxv2f32.f32( + , + float, + , + , + i32); + +define @intrinsic_vfmsac_mask_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv2f32_f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m1,ta,mu +; CHECK-NEXT: vfmsac.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsac.mask.nxv2f32.f32( + %0, + float %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmsac.nxv4f32.f32( + , + float, + , + i32); + +define @intrinsic_vfmsac_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_vf_nxv4f32_f32_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m2,ta,mu +; CHECK-NEXT: vfmsac.vf v16, ft0, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsac.nxv4f32.f32( + %0, + float %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmsac.mask.nxv4f32.f32( + , + float, + , + , + i32); + +define @intrinsic_vfmsac_mask_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv4f32_f32_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m2,ta,mu +; CHECK-NEXT: vfmsac.vf v16, ft0, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsac.mask.nxv4f32.f32( + %0, + float %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmsac.nxv8f32.f32( + , + float, + , + i32); + +define @intrinsic_vfmsac_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_vf_nxv8f32_f32_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m4,ta,mu +; CHECK-NEXT: vfmsac.vf v16, ft0, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsac.nxv8f32.f32( + %0, + float %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmsac.mask.nxv8f32.f32( + , + float, + , + , + i32); + +define @intrinsic_vfmsac_mask_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv8f32_f32_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m4,ta,mu +; CHECK-NEXT: vfmsac.vf v16, ft0, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsac.mask.nxv8f32.f32( + %0, + float %1, + %2, + %3, + i32 %4) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmsac-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfmsac-rv64.ll new file mode 100644 index 0000000000000..03364aba84307 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfmsac-rv64.ll @@ -0,0 +1,1142 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+experimental-zfh -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare @llvm.riscv.vfmsac.nxv1f16.nxv1f16( + , + , + , + i64); + +define @intrinsic_vfmsac_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_vv_nxv1f16_nxv1f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu +; CHECK-NEXT: vfmsac.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsac.nxv1f16.nxv1f16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmsac.mask.nxv1f16.nxv1f16( + , + , + , + , + i64); + +define @intrinsic_vfmsac_mask_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_mask_vv_nxv1f16_nxv1f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu +; CHECK-NEXT: vfmsac.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsac.mask.nxv1f16.nxv1f16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmsac.nxv2f16.nxv2f16( + , + , + , + i64); + +define @intrinsic_vfmsac_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_vv_nxv2f16_nxv2f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu +; CHECK-NEXT: vfmsac.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsac.nxv2f16.nxv2f16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmsac.mask.nxv2f16.nxv2f16( + , + , + , + , + i64); + +define @intrinsic_vfmsac_mask_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_mask_vv_nxv2f16_nxv2f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu +; CHECK-NEXT: vfmsac.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsac.mask.nxv2f16.nxv2f16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmsac.nxv4f16.nxv4f16( + , + , + , + i64); + +define @intrinsic_vfmsac_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_vv_nxv4f16_nxv4f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vfmsac.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsac.nxv4f16.nxv4f16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmsac.mask.nxv4f16.nxv4f16( + , + , + , + , + i64); + +define @intrinsic_vfmsac_mask_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_mask_vv_nxv4f16_nxv4f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vfmsac.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsac.mask.nxv4f16.nxv4f16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmsac.nxv8f16.nxv8f16( + , + , + , + i64); + +define @intrinsic_vfmsac_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_vv_nxv8f16_nxv8f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu +; CHECK-NEXT: vfmsac.vv v16, v18, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsac.nxv8f16.nxv8f16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmsac.mask.nxv8f16.nxv8f16( + , + , + , + , + i64); + +define @intrinsic_vfmsac_mask_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_mask_vv_nxv8f16_nxv8f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu +; CHECK-NEXT: vfmsac.vv v16, v18, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsac.mask.nxv8f16.nxv8f16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmsac.nxv16f16.nxv16f16( + , + , + , + i64); + +define @intrinsic_vfmsac_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_vv_nxv16f16_nxv16f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a0) +; CHECK-NEXT: vsetvli a0, a1, e16,m4,ta,mu +; CHECK-NEXT: vfmsac.vv v16, v20, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsac.nxv16f16.nxv16f16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmsac.mask.nxv16f16.nxv16f16( + , + , + , + , + i64); + +define @intrinsic_vfmsac_mask_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_mask_vv_nxv16f16_nxv16f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a0) +; CHECK-NEXT: vsetvli a0, a1, e16,m4,ta,mu +; CHECK-NEXT: vfmsac.vv v16, v20, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsac.mask.nxv16f16.nxv16f16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmsac.nxv1f32.nxv1f32( + , + , + , + i64); + +define @intrinsic_vfmsac_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_vv_nxv1f32_nxv1f32_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,mf2,ta,mu +; CHECK-NEXT: vfmsac.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsac.nxv1f32.nxv1f32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmsac.mask.nxv1f32.nxv1f32( + , + , + , + , + i64); + +define @intrinsic_vfmsac_mask_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_mask_vv_nxv1f32_nxv1f32_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,mf2,ta,mu +; CHECK-NEXT: vfmsac.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsac.mask.nxv1f32.nxv1f32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmsac.nxv2f32.nxv2f32( + , + , + , + i64); + +define @intrinsic_vfmsac_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_vv_nxv2f32_nxv2f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; CHECK-NEXT: vfmsac.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsac.nxv2f32.nxv2f32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmsac.mask.nxv2f32.nxv2f32( + , + , + , + , + i64); + +define @intrinsic_vfmsac_mask_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_mask_vv_nxv2f32_nxv2f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; CHECK-NEXT: vfmsac.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsac.mask.nxv2f32.nxv2f32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmsac.nxv4f32.nxv4f32( + , + , + , + i64); + +define @intrinsic_vfmsac_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_vv_nxv4f32_nxv4f32_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu +; CHECK-NEXT: vfmsac.vv v16, v18, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsac.nxv4f32.nxv4f32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmsac.mask.nxv4f32.nxv4f32( + , + , + , + , + i64); + +define @intrinsic_vfmsac_mask_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_mask_vv_nxv4f32_nxv4f32_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu +; CHECK-NEXT: vfmsac.vv v16, v18, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsac.mask.nxv4f32.nxv4f32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmsac.nxv8f32.nxv8f32( + , + , + , + i64); + +define @intrinsic_vfmsac_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_vv_nxv8f32_nxv8f32_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e32,m4,ta,mu +; CHECK-NEXT: vle32.v v28, (a0) +; CHECK-NEXT: vsetvli a0, a1, e32,m4,ta,mu +; CHECK-NEXT: vfmsac.vv v16, v20, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsac.nxv8f32.nxv8f32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmsac.mask.nxv8f32.nxv8f32( + , + , + , + , + i64); + +define @intrinsic_vfmsac_mask_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_mask_vv_nxv8f32_nxv8f32_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e32,m4,ta,mu +; CHECK-NEXT: vle32.v v28, (a0) +; CHECK-NEXT: vsetvli a0, a1, e32,m4,ta,mu +; CHECK-NEXT: vfmsac.vv v16, v20, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsac.mask.nxv8f32.nxv8f32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmsac.nxv1f64.nxv1f64( + , + , + , + i64); + +define @intrinsic_vfmsac_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_vv_nxv1f64_nxv1f64_nxv1f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu +; CHECK-NEXT: vfmsac.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsac.nxv1f64.nxv1f64( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmsac.mask.nxv1f64.nxv1f64( + , + , + , + , + i64); + +define @intrinsic_vfmsac_mask_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_mask_vv_nxv1f64_nxv1f64_nxv1f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu +; CHECK-NEXT: vfmsac.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsac.mask.nxv1f64.nxv1f64( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmsac.nxv2f64.nxv2f64( + , + , + , + i64); + +define @intrinsic_vfmsac_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_vv_nxv2f64_nxv2f64_nxv2f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e64,m2,ta,mu +; CHECK-NEXT: vfmsac.vv v16, v18, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsac.nxv2f64.nxv2f64( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmsac.mask.nxv2f64.nxv2f64( + , + , + , + , + i64); + +define @intrinsic_vfmsac_mask_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_mask_vv_nxv2f64_nxv2f64_nxv2f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e64,m2,ta,mu +; CHECK-NEXT: vfmsac.vv v16, v18, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsac.mask.nxv2f64.nxv2f64( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmsac.nxv4f64.nxv4f64( + , + , + , + i64); + +define @intrinsic_vfmsac_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_vv_nxv4f64_nxv4f64_nxv4f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e64,m4,ta,mu +; CHECK-NEXT: vle64.v v28, (a0) +; CHECK-NEXT: vsetvli a0, a1, e64,m4,ta,mu +; CHECK-NEXT: vfmsac.vv v16, v20, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsac.nxv4f64.nxv4f64( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmsac.mask.nxv4f64.nxv4f64( + , + , + , + , + i64); + +define @intrinsic_vfmsac_mask_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_mask_vv_nxv4f64_nxv4f64_nxv4f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e64,m4,ta,mu +; CHECK-NEXT: vle64.v v28, (a0) +; CHECK-NEXT: vsetvli a0, a1, e64,m4,ta,mu +; CHECK-NEXT: vfmsac.vv v16, v20, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsac.mask.nxv4f64.nxv4f64( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmsac.nxv1f16.f16( + , + half, + , + i64); + +define @intrinsic_vfmsac_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_vf_nxv1f16_f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf4,ta,mu +; CHECK-NEXT: vfmsac.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsac.nxv1f16.f16( + %0, + half %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmsac.mask.nxv1f16.f16( + , + half, + , + , + i64); + +define @intrinsic_vfmsac_mask_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv1f16_f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf4,ta,mu +; CHECK-NEXT: vfmsac.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsac.mask.nxv1f16.f16( + %0, + half %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmsac.nxv2f16.f16( + , + half, + , + i64); + +define @intrinsic_vfmsac_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_vf_nxv2f16_f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf2,ta,mu +; CHECK-NEXT: vfmsac.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsac.nxv2f16.f16( + %0, + half %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmsac.mask.nxv2f16.f16( + , + half, + , + , + i64); + +define @intrinsic_vfmsac_mask_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv2f16_f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf2,ta,mu +; CHECK-NEXT: vfmsac.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsac.mask.nxv2f16.f16( + %0, + half %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmsac.nxv4f16.f16( + , + half, + , + i64); + +define @intrinsic_vfmsac_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_vf_nxv4f16_f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m1,ta,mu +; CHECK-NEXT: vfmsac.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsac.nxv4f16.f16( + %0, + half %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmsac.mask.nxv4f16.f16( + , + half, + , + , + i64); + +define @intrinsic_vfmsac_mask_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv4f16_f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m1,ta,mu +; CHECK-NEXT: vfmsac.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsac.mask.nxv4f16.f16( + %0, + half %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmsac.nxv8f16.f16( + , + half, + , + i64); + +define @intrinsic_vfmsac_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_vf_nxv8f16_f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m2,ta,mu +; CHECK-NEXT: vfmsac.vf v16, ft0, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsac.nxv8f16.f16( + %0, + half %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmsac.mask.nxv8f16.f16( + , + half, + , + , + i64); + +define @intrinsic_vfmsac_mask_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv8f16_f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m2,ta,mu +; CHECK-NEXT: vfmsac.vf v16, ft0, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsac.mask.nxv8f16.f16( + %0, + half %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmsac.nxv16f16.f16( + , + half, + , + i64); + +define @intrinsic_vfmsac_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_vf_nxv16f16_f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m4,ta,mu +; CHECK-NEXT: vfmsac.vf v16, ft0, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsac.nxv16f16.f16( + %0, + half %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmsac.mask.nxv16f16.f16( + , + half, + , + , + i64); + +define @intrinsic_vfmsac_mask_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv16f16_f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m4,ta,mu +; CHECK-NEXT: vfmsac.vf v16, ft0, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsac.mask.nxv16f16.f16( + %0, + half %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmsac.nxv1f32.f32( + , + float, + , + i64); + +define @intrinsic_vfmsac_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_vf_nxv1f32_f32_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,mf2,ta,mu +; CHECK-NEXT: vfmsac.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsac.nxv1f32.f32( + %0, + float %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmsac.mask.nxv1f32.f32( + , + float, + , + , + i64); + +define @intrinsic_vfmsac_mask_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv1f32_f32_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,mf2,ta,mu +; CHECK-NEXT: vfmsac.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsac.mask.nxv1f32.f32( + %0, + float %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmsac.nxv2f32.f32( + , + float, + , + i64); + +define @intrinsic_vfmsac_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_vf_nxv2f32_f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m1,ta,mu +; CHECK-NEXT: vfmsac.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsac.nxv2f32.f32( + %0, + float %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmsac.mask.nxv2f32.f32( + , + float, + , + , + i64); + +define @intrinsic_vfmsac_mask_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv2f32_f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m1,ta,mu +; CHECK-NEXT: vfmsac.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsac.mask.nxv2f32.f32( + %0, + float %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmsac.nxv4f32.f32( + , + float, + , + i64); + +define @intrinsic_vfmsac_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_vf_nxv4f32_f32_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m2,ta,mu +; CHECK-NEXT: vfmsac.vf v16, ft0, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsac.nxv4f32.f32( + %0, + float %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmsac.mask.nxv4f32.f32( + , + float, + , + , + i64); + +define @intrinsic_vfmsac_mask_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv4f32_f32_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m2,ta,mu +; CHECK-NEXT: vfmsac.vf v16, ft0, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsac.mask.nxv4f32.f32( + %0, + float %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmsac.nxv8f32.f32( + , + float, + , + i64); + +define @intrinsic_vfmsac_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_vf_nxv8f32_f32_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m4,ta,mu +; CHECK-NEXT: vfmsac.vf v16, ft0, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsac.nxv8f32.f32( + %0, + float %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmsac.mask.nxv8f32.f32( + , + float, + , + , + i64); + +define @intrinsic_vfmsac_mask_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv8f32_f32_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m4,ta,mu +; CHECK-NEXT: vfmsac.vf v16, ft0, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsac.mask.nxv8f32.f32( + %0, + float %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmsac.nxv1f64.f64( + , + double, + , + i64); + +define @intrinsic_vfmsac_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_vf_nxv1f64_f64_nxv1f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.d.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e64,m1,ta,mu +; CHECK-NEXT: vfmsac.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsac.nxv1f64.f64( + %0, + double %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmsac.mask.nxv1f64.f64( + , + double, + , + , + i64); + +define @intrinsic_vfmsac_mask_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv1f64_f64_nxv1f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.d.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e64,m1,ta,mu +; CHECK-NEXT: vfmsac.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsac.mask.nxv1f64.f64( + %0, + double %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmsac.nxv2f64.f64( + , + double, + , + i64); + +define @intrinsic_vfmsac_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_vf_nxv2f64_f64_nxv2f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.d.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e64,m2,ta,mu +; CHECK-NEXT: vfmsac.vf v16, ft0, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsac.nxv2f64.f64( + %0, + double %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmsac.mask.nxv2f64.f64( + , + double, + , + , + i64); + +define @intrinsic_vfmsac_mask_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv2f64_f64_nxv2f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.d.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e64,m2,ta,mu +; CHECK-NEXT: vfmsac.vf v16, ft0, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsac.mask.nxv2f64.f64( + %0, + double %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmsac.nxv4f64.f64( + , + double, + , + i64); + +define @intrinsic_vfmsac_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_vf_nxv4f64_f64_nxv4f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.d.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e64,m4,ta,mu +; CHECK-NEXT: vfmsac.vf v16, ft0, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsac.nxv4f64.f64( + %0, + double %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmsac.mask.nxv4f64.f64( + , + double, + , + , + i64); + +define @intrinsic_vfmsac_mask_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv4f64_f64_nxv4f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.d.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e64,m4,ta,mu +; CHECK-NEXT: vfmsac.vf v16, ft0, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsac.mask.nxv4f64.f64( + %0, + double %1, + %2, + %3, + i64 %4) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmsub-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfmsub-rv32.ll new file mode 100644 index 0000000000000..1d8cb060e3222 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfmsub-rv32.ll @@ -0,0 +1,856 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f,+experimental-zfh -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare @llvm.riscv.vfmsub.nxv1f16.nxv1f16( + , + , + , + i32); + +define @intrinsic_vfmsub_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_vv_nxv1f16_nxv1f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu +; CHECK-NEXT: vfmsub.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsub.nxv1f16.nxv1f16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmsub.mask.nxv1f16.nxv1f16( + , + , + , + , + i32); + +define @intrinsic_vfmsub_mask_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_mask_vv_nxv1f16_nxv1f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu +; CHECK-NEXT: vfmsub.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsub.mask.nxv1f16.nxv1f16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmsub.nxv2f16.nxv2f16( + , + , + , + i32); + +define @intrinsic_vfmsub_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_vv_nxv2f16_nxv2f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu +; CHECK-NEXT: vfmsub.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsub.nxv2f16.nxv2f16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmsub.mask.nxv2f16.nxv2f16( + , + , + , + , + i32); + +define @intrinsic_vfmsub_mask_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_mask_vv_nxv2f16_nxv2f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu +; CHECK-NEXT: vfmsub.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsub.mask.nxv2f16.nxv2f16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmsub.nxv4f16.nxv4f16( + , + , + , + i32); + +define @intrinsic_vfmsub_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_vv_nxv4f16_nxv4f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vfmsub.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsub.nxv4f16.nxv4f16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmsub.mask.nxv4f16.nxv4f16( + , + , + , + , + i32); + +define @intrinsic_vfmsub_mask_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_mask_vv_nxv4f16_nxv4f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vfmsub.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsub.mask.nxv4f16.nxv4f16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmsub.nxv8f16.nxv8f16( + , + , + , + i32); + +define @intrinsic_vfmsub_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_vv_nxv8f16_nxv8f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu +; CHECK-NEXT: vfmsub.vv v16, v18, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsub.nxv8f16.nxv8f16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmsub.mask.nxv8f16.nxv8f16( + , + , + , + , + i32); + +define @intrinsic_vfmsub_mask_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_mask_vv_nxv8f16_nxv8f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu +; CHECK-NEXT: vfmsub.vv v16, v18, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsub.mask.nxv8f16.nxv8f16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmsub.nxv16f16.nxv16f16( + , + , + , + i32); + +define @intrinsic_vfmsub_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_vv_nxv16f16_nxv16f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a0) +; CHECK-NEXT: vsetvli a0, a1, e16,m4,ta,mu +; CHECK-NEXT: vfmsub.vv v16, v20, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsub.nxv16f16.nxv16f16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmsub.mask.nxv16f16.nxv16f16( + , + , + , + , + i32); + +define @intrinsic_vfmsub_mask_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_mask_vv_nxv16f16_nxv16f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a0) +; CHECK-NEXT: vsetvli a0, a1, e16,m4,ta,mu +; CHECK-NEXT: vfmsub.vv v16, v20, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsub.mask.nxv16f16.nxv16f16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmsub.nxv1f32.nxv1f32( + , + , + , + i32); + +define @intrinsic_vfmsub_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_vv_nxv1f32_nxv1f32_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,mf2,ta,mu +; CHECK-NEXT: vfmsub.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsub.nxv1f32.nxv1f32( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmsub.mask.nxv1f32.nxv1f32( + , + , + , + , + i32); + +define @intrinsic_vfmsub_mask_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_mask_vv_nxv1f32_nxv1f32_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,mf2,ta,mu +; CHECK-NEXT: vfmsub.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsub.mask.nxv1f32.nxv1f32( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmsub.nxv2f32.nxv2f32( + , + , + , + i32); + +define @intrinsic_vfmsub_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_vv_nxv2f32_nxv2f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; CHECK-NEXT: vfmsub.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsub.nxv2f32.nxv2f32( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmsub.mask.nxv2f32.nxv2f32( + , + , + , + , + i32); + +define @intrinsic_vfmsub_mask_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_mask_vv_nxv2f32_nxv2f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; CHECK-NEXT: vfmsub.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsub.mask.nxv2f32.nxv2f32( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmsub.nxv4f32.nxv4f32( + , + , + , + i32); + +define @intrinsic_vfmsub_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_vv_nxv4f32_nxv4f32_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu +; CHECK-NEXT: vfmsub.vv v16, v18, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsub.nxv4f32.nxv4f32( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmsub.mask.nxv4f32.nxv4f32( + , + , + , + , + i32); + +define @intrinsic_vfmsub_mask_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_mask_vv_nxv4f32_nxv4f32_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu +; CHECK-NEXT: vfmsub.vv v16, v18, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsub.mask.nxv4f32.nxv4f32( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmsub.nxv8f32.nxv8f32( + , + , + , + i32); + +define @intrinsic_vfmsub_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_vv_nxv8f32_nxv8f32_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e32,m4,ta,mu +; CHECK-NEXT: vle32.v v28, (a0) +; CHECK-NEXT: vsetvli a0, a1, e32,m4,ta,mu +; CHECK-NEXT: vfmsub.vv v16, v20, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsub.nxv8f32.nxv8f32( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmsub.mask.nxv8f32.nxv8f32( + , + , + , + , + i32); + +define @intrinsic_vfmsub_mask_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_mask_vv_nxv8f32_nxv8f32_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e32,m4,ta,mu +; CHECK-NEXT: vle32.v v28, (a0) +; CHECK-NEXT: vsetvli a0, a1, e32,m4,ta,mu +; CHECK-NEXT: vfmsub.vv v16, v20, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsub.mask.nxv8f32.nxv8f32( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmsub.nxv1f16.f16( + , + half, + , + i32); + +define @intrinsic_vfmsub_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_vf_nxv1f16_f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf4,ta,mu +; CHECK-NEXT: vfmsub.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsub.nxv1f16.f16( + %0, + half %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmsub.mask.nxv1f16.f16( + , + half, + , + , + i32); + +define @intrinsic_vfmsub_mask_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv1f16_f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf4,ta,mu +; CHECK-NEXT: vfmsub.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsub.mask.nxv1f16.f16( + %0, + half %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmsub.nxv2f16.f16( + , + half, + , + i32); + +define @intrinsic_vfmsub_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_vf_nxv2f16_f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf2,ta,mu +; CHECK-NEXT: vfmsub.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsub.nxv2f16.f16( + %0, + half %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmsub.mask.nxv2f16.f16( + , + half, + , + , + i32); + +define @intrinsic_vfmsub_mask_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv2f16_f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf2,ta,mu +; CHECK-NEXT: vfmsub.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsub.mask.nxv2f16.f16( + %0, + half %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmsub.nxv4f16.f16( + , + half, + , + i32); + +define @intrinsic_vfmsub_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_vf_nxv4f16_f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m1,ta,mu +; CHECK-NEXT: vfmsub.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsub.nxv4f16.f16( + %0, + half %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmsub.mask.nxv4f16.f16( + , + half, + , + , + i32); + +define @intrinsic_vfmsub_mask_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv4f16_f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m1,ta,mu +; CHECK-NEXT: vfmsub.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsub.mask.nxv4f16.f16( + %0, + half %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmsub.nxv8f16.f16( + , + half, + , + i32); + +define @intrinsic_vfmsub_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_vf_nxv8f16_f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m2,ta,mu +; CHECK-NEXT: vfmsub.vf v16, ft0, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsub.nxv8f16.f16( + %0, + half %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmsub.mask.nxv8f16.f16( + , + half, + , + , + i32); + +define @intrinsic_vfmsub_mask_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv8f16_f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m2,ta,mu +; CHECK-NEXT: vfmsub.vf v16, ft0, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsub.mask.nxv8f16.f16( + %0, + half %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmsub.nxv16f16.f16( + , + half, + , + i32); + +define @intrinsic_vfmsub_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_vf_nxv16f16_f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m4,ta,mu +; CHECK-NEXT: vfmsub.vf v16, ft0, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsub.nxv16f16.f16( + %0, + half %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmsub.mask.nxv16f16.f16( + , + half, + , + , + i32); + +define @intrinsic_vfmsub_mask_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv16f16_f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m4,ta,mu +; CHECK-NEXT: vfmsub.vf v16, ft0, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsub.mask.nxv16f16.f16( + %0, + half %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmsub.nxv1f32.f32( + , + float, + , + i32); + +define @intrinsic_vfmsub_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_vf_nxv1f32_f32_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,mf2,ta,mu +; CHECK-NEXT: vfmsub.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsub.nxv1f32.f32( + %0, + float %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmsub.mask.nxv1f32.f32( + , + float, + , + , + i32); + +define @intrinsic_vfmsub_mask_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv1f32_f32_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,mf2,ta,mu +; CHECK-NEXT: vfmsub.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsub.mask.nxv1f32.f32( + %0, + float %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmsub.nxv2f32.f32( + , + float, + , + i32); + +define @intrinsic_vfmsub_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_vf_nxv2f32_f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m1,ta,mu +; CHECK-NEXT: vfmsub.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsub.nxv2f32.f32( + %0, + float %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmsub.mask.nxv2f32.f32( + , + float, + , + , + i32); + +define @intrinsic_vfmsub_mask_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv2f32_f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m1,ta,mu +; CHECK-NEXT: vfmsub.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsub.mask.nxv2f32.f32( + %0, + float %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmsub.nxv4f32.f32( + , + float, + , + i32); + +define @intrinsic_vfmsub_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_vf_nxv4f32_f32_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m2,ta,mu +; CHECK-NEXT: vfmsub.vf v16, ft0, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsub.nxv4f32.f32( + %0, + float %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmsub.mask.nxv4f32.f32( + , + float, + , + , + i32); + +define @intrinsic_vfmsub_mask_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv4f32_f32_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m2,ta,mu +; CHECK-NEXT: vfmsub.vf v16, ft0, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsub.mask.nxv4f32.f32( + %0, + float %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmsub.nxv8f32.f32( + , + float, + , + i32); + +define @intrinsic_vfmsub_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_vf_nxv8f32_f32_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m4,ta,mu +; CHECK-NEXT: vfmsub.vf v16, ft0, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsub.nxv8f32.f32( + %0, + float %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmsub.mask.nxv8f32.f32( + , + float, + , + , + i32); + +define @intrinsic_vfmsub_mask_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv8f32_f32_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m4,ta,mu +; CHECK-NEXT: vfmsub.vf v16, ft0, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsub.mask.nxv8f32.f32( + %0, + float %1, + %2, + %3, + i32 %4) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmsub-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfmsub-rv64.ll new file mode 100644 index 0000000000000..347731829812e --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfmsub-rv64.ll @@ -0,0 +1,1142 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+experimental-zfh -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare @llvm.riscv.vfmsub.nxv1f16.nxv1f16( + , + , + , + i64); + +define @intrinsic_vfmsub_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_vv_nxv1f16_nxv1f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu +; CHECK-NEXT: vfmsub.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsub.nxv1f16.nxv1f16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmsub.mask.nxv1f16.nxv1f16( + , + , + , + , + i64); + +define @intrinsic_vfmsub_mask_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_mask_vv_nxv1f16_nxv1f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu +; CHECK-NEXT: vfmsub.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsub.mask.nxv1f16.nxv1f16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmsub.nxv2f16.nxv2f16( + , + , + , + i64); + +define @intrinsic_vfmsub_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_vv_nxv2f16_nxv2f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu +; CHECK-NEXT: vfmsub.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsub.nxv2f16.nxv2f16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmsub.mask.nxv2f16.nxv2f16( + , + , + , + , + i64); + +define @intrinsic_vfmsub_mask_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_mask_vv_nxv2f16_nxv2f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu +; CHECK-NEXT: vfmsub.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsub.mask.nxv2f16.nxv2f16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmsub.nxv4f16.nxv4f16( + , + , + , + i64); + +define @intrinsic_vfmsub_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_vv_nxv4f16_nxv4f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vfmsub.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsub.nxv4f16.nxv4f16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmsub.mask.nxv4f16.nxv4f16( + , + , + , + , + i64); + +define @intrinsic_vfmsub_mask_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_mask_vv_nxv4f16_nxv4f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vfmsub.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsub.mask.nxv4f16.nxv4f16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmsub.nxv8f16.nxv8f16( + , + , + , + i64); + +define @intrinsic_vfmsub_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_vv_nxv8f16_nxv8f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu +; CHECK-NEXT: vfmsub.vv v16, v18, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsub.nxv8f16.nxv8f16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmsub.mask.nxv8f16.nxv8f16( + , + , + , + , + i64); + +define @intrinsic_vfmsub_mask_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_mask_vv_nxv8f16_nxv8f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu +; CHECK-NEXT: vfmsub.vv v16, v18, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsub.mask.nxv8f16.nxv8f16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmsub.nxv16f16.nxv16f16( + , + , + , + i64); + +define @intrinsic_vfmsub_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_vv_nxv16f16_nxv16f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a0) +; CHECK-NEXT: vsetvli a0, a1, e16,m4,ta,mu +; CHECK-NEXT: vfmsub.vv v16, v20, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsub.nxv16f16.nxv16f16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmsub.mask.nxv16f16.nxv16f16( + , + , + , + , + i64); + +define @intrinsic_vfmsub_mask_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_mask_vv_nxv16f16_nxv16f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a0) +; CHECK-NEXT: vsetvli a0, a1, e16,m4,ta,mu +; CHECK-NEXT: vfmsub.vv v16, v20, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsub.mask.nxv16f16.nxv16f16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmsub.nxv1f32.nxv1f32( + , + , + , + i64); + +define @intrinsic_vfmsub_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_vv_nxv1f32_nxv1f32_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,mf2,ta,mu +; CHECK-NEXT: vfmsub.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsub.nxv1f32.nxv1f32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmsub.mask.nxv1f32.nxv1f32( + , + , + , + , + i64); + +define @intrinsic_vfmsub_mask_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_mask_vv_nxv1f32_nxv1f32_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,mf2,ta,mu +; CHECK-NEXT: vfmsub.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsub.mask.nxv1f32.nxv1f32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmsub.nxv2f32.nxv2f32( + , + , + , + i64); + +define @intrinsic_vfmsub_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_vv_nxv2f32_nxv2f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; CHECK-NEXT: vfmsub.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsub.nxv2f32.nxv2f32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmsub.mask.nxv2f32.nxv2f32( + , + , + , + , + i64); + +define @intrinsic_vfmsub_mask_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_mask_vv_nxv2f32_nxv2f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; CHECK-NEXT: vfmsub.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsub.mask.nxv2f32.nxv2f32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmsub.nxv4f32.nxv4f32( + , + , + , + i64); + +define @intrinsic_vfmsub_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_vv_nxv4f32_nxv4f32_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu +; CHECK-NEXT: vfmsub.vv v16, v18, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsub.nxv4f32.nxv4f32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmsub.mask.nxv4f32.nxv4f32( + , + , + , + , + i64); + +define @intrinsic_vfmsub_mask_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_mask_vv_nxv4f32_nxv4f32_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu +; CHECK-NEXT: vfmsub.vv v16, v18, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsub.mask.nxv4f32.nxv4f32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmsub.nxv8f32.nxv8f32( + , + , + , + i64); + +define @intrinsic_vfmsub_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_vv_nxv8f32_nxv8f32_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e32,m4,ta,mu +; CHECK-NEXT: vle32.v v28, (a0) +; CHECK-NEXT: vsetvli a0, a1, e32,m4,ta,mu +; CHECK-NEXT: vfmsub.vv v16, v20, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsub.nxv8f32.nxv8f32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmsub.mask.nxv8f32.nxv8f32( + , + , + , + , + i64); + +define @intrinsic_vfmsub_mask_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_mask_vv_nxv8f32_nxv8f32_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e32,m4,ta,mu +; CHECK-NEXT: vle32.v v28, (a0) +; CHECK-NEXT: vsetvli a0, a1, e32,m4,ta,mu +; CHECK-NEXT: vfmsub.vv v16, v20, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsub.mask.nxv8f32.nxv8f32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmsub.nxv1f64.nxv1f64( + , + , + , + i64); + +define @intrinsic_vfmsub_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_vv_nxv1f64_nxv1f64_nxv1f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu +; CHECK-NEXT: vfmsub.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsub.nxv1f64.nxv1f64( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmsub.mask.nxv1f64.nxv1f64( + , + , + , + , + i64); + +define @intrinsic_vfmsub_mask_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_mask_vv_nxv1f64_nxv1f64_nxv1f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu +; CHECK-NEXT: vfmsub.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsub.mask.nxv1f64.nxv1f64( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmsub.nxv2f64.nxv2f64( + , + , + , + i64); + +define @intrinsic_vfmsub_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_vv_nxv2f64_nxv2f64_nxv2f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e64,m2,ta,mu +; CHECK-NEXT: vfmsub.vv v16, v18, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsub.nxv2f64.nxv2f64( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmsub.mask.nxv2f64.nxv2f64( + , + , + , + , + i64); + +define @intrinsic_vfmsub_mask_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_mask_vv_nxv2f64_nxv2f64_nxv2f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e64,m2,ta,mu +; CHECK-NEXT: vfmsub.vv v16, v18, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsub.mask.nxv2f64.nxv2f64( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmsub.nxv4f64.nxv4f64( + , + , + , + i64); + +define @intrinsic_vfmsub_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_vv_nxv4f64_nxv4f64_nxv4f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e64,m4,ta,mu +; CHECK-NEXT: vle64.v v28, (a0) +; CHECK-NEXT: vsetvli a0, a1, e64,m4,ta,mu +; CHECK-NEXT: vfmsub.vv v16, v20, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsub.nxv4f64.nxv4f64( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmsub.mask.nxv4f64.nxv4f64( + , + , + , + , + i64); + +define @intrinsic_vfmsub_mask_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_mask_vv_nxv4f64_nxv4f64_nxv4f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e64,m4,ta,mu +; CHECK-NEXT: vle64.v v28, (a0) +; CHECK-NEXT: vsetvli a0, a1, e64,m4,ta,mu +; CHECK-NEXT: vfmsub.vv v16, v20, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsub.mask.nxv4f64.nxv4f64( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmsub.nxv1f16.f16( + , + half, + , + i64); + +define @intrinsic_vfmsub_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_vf_nxv1f16_f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf4,ta,mu +; CHECK-NEXT: vfmsub.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsub.nxv1f16.f16( + %0, + half %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmsub.mask.nxv1f16.f16( + , + half, + , + , + i64); + +define @intrinsic_vfmsub_mask_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv1f16_f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf4,ta,mu +; CHECK-NEXT: vfmsub.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsub.mask.nxv1f16.f16( + %0, + half %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmsub.nxv2f16.f16( + , + half, + , + i64); + +define @intrinsic_vfmsub_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_vf_nxv2f16_f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf2,ta,mu +; CHECK-NEXT: vfmsub.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsub.nxv2f16.f16( + %0, + half %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmsub.mask.nxv2f16.f16( + , + half, + , + , + i64); + +define @intrinsic_vfmsub_mask_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv2f16_f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf2,ta,mu +; CHECK-NEXT: vfmsub.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsub.mask.nxv2f16.f16( + %0, + half %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmsub.nxv4f16.f16( + , + half, + , + i64); + +define @intrinsic_vfmsub_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_vf_nxv4f16_f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m1,ta,mu +; CHECK-NEXT: vfmsub.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsub.nxv4f16.f16( + %0, + half %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmsub.mask.nxv4f16.f16( + , + half, + , + , + i64); + +define @intrinsic_vfmsub_mask_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv4f16_f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m1,ta,mu +; CHECK-NEXT: vfmsub.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsub.mask.nxv4f16.f16( + %0, + half %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmsub.nxv8f16.f16( + , + half, + , + i64); + +define @intrinsic_vfmsub_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_vf_nxv8f16_f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m2,ta,mu +; CHECK-NEXT: vfmsub.vf v16, ft0, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsub.nxv8f16.f16( + %0, + half %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmsub.mask.nxv8f16.f16( + , + half, + , + , + i64); + +define @intrinsic_vfmsub_mask_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv8f16_f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m2,ta,mu +; CHECK-NEXT: vfmsub.vf v16, ft0, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsub.mask.nxv8f16.f16( + %0, + half %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmsub.nxv16f16.f16( + , + half, + , + i64); + +define @intrinsic_vfmsub_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_vf_nxv16f16_f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m4,ta,mu +; CHECK-NEXT: vfmsub.vf v16, ft0, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsub.nxv16f16.f16( + %0, + half %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmsub.mask.nxv16f16.f16( + , + half, + , + , + i64); + +define @intrinsic_vfmsub_mask_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv16f16_f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m4,ta,mu +; CHECK-NEXT: vfmsub.vf v16, ft0, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsub.mask.nxv16f16.f16( + %0, + half %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmsub.nxv1f32.f32( + , + float, + , + i64); + +define @intrinsic_vfmsub_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_vf_nxv1f32_f32_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,mf2,ta,mu +; CHECK-NEXT: vfmsub.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsub.nxv1f32.f32( + %0, + float %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmsub.mask.nxv1f32.f32( + , + float, + , + , + i64); + +define @intrinsic_vfmsub_mask_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv1f32_f32_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,mf2,ta,mu +; CHECK-NEXT: vfmsub.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsub.mask.nxv1f32.f32( + %0, + float %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmsub.nxv2f32.f32( + , + float, + , + i64); + +define @intrinsic_vfmsub_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_vf_nxv2f32_f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m1,ta,mu +; CHECK-NEXT: vfmsub.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsub.nxv2f32.f32( + %0, + float %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmsub.mask.nxv2f32.f32( + , + float, + , + , + i64); + +define @intrinsic_vfmsub_mask_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv2f32_f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m1,ta,mu +; CHECK-NEXT: vfmsub.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsub.mask.nxv2f32.f32( + %0, + float %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmsub.nxv4f32.f32( + , + float, + , + i64); + +define @intrinsic_vfmsub_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_vf_nxv4f32_f32_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m2,ta,mu +; CHECK-NEXT: vfmsub.vf v16, ft0, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsub.nxv4f32.f32( + %0, + float %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmsub.mask.nxv4f32.f32( + , + float, + , + , + i64); + +define @intrinsic_vfmsub_mask_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv4f32_f32_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m2,ta,mu +; CHECK-NEXT: vfmsub.vf v16, ft0, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsub.mask.nxv4f32.f32( + %0, + float %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmsub.nxv8f32.f32( + , + float, + , + i64); + +define @intrinsic_vfmsub_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_vf_nxv8f32_f32_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m4,ta,mu +; CHECK-NEXT: vfmsub.vf v16, ft0, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsub.nxv8f32.f32( + %0, + float %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmsub.mask.nxv8f32.f32( + , + float, + , + , + i64); + +define @intrinsic_vfmsub_mask_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv8f32_f32_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m4,ta,mu +; CHECK-NEXT: vfmsub.vf v16, ft0, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsub.mask.nxv8f32.f32( + %0, + float %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmsub.nxv1f64.f64( + , + double, + , + i64); + +define @intrinsic_vfmsub_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_vf_nxv1f64_f64_nxv1f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.d.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e64,m1,ta,mu +; CHECK-NEXT: vfmsub.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsub.nxv1f64.f64( + %0, + double %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmsub.mask.nxv1f64.f64( + , + double, + , + , + i64); + +define @intrinsic_vfmsub_mask_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv1f64_f64_nxv1f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.d.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e64,m1,ta,mu +; CHECK-NEXT: vfmsub.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsub.mask.nxv1f64.f64( + %0, + double %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmsub.nxv2f64.f64( + , + double, + , + i64); + +define @intrinsic_vfmsub_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_vf_nxv2f64_f64_nxv2f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.d.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e64,m2,ta,mu +; CHECK-NEXT: vfmsub.vf v16, ft0, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsub.nxv2f64.f64( + %0, + double %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmsub.mask.nxv2f64.f64( + , + double, + , + , + i64); + +define @intrinsic_vfmsub_mask_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv2f64_f64_nxv2f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.d.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e64,m2,ta,mu +; CHECK-NEXT: vfmsub.vf v16, ft0, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsub.mask.nxv2f64.f64( + %0, + double %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmsub.nxv4f64.f64( + , + double, + , + i64); + +define @intrinsic_vfmsub_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_vf_nxv4f64_f64_nxv4f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.d.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e64,m4,ta,mu +; CHECK-NEXT: vfmsub.vf v16, ft0, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsub.nxv4f64.f64( + %0, + double %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmsub.mask.nxv4f64.f64( + , + double, + , + , + i64); + +define @intrinsic_vfmsub_mask_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv4f64_f64_nxv4f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.d.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e64,m4,ta,mu +; CHECK-NEXT: vfmsub.vf v16, ft0, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfmsub.mask.nxv4f64.f64( + %0, + double %1, + %2, + %3, + i64 %4) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmacc-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmacc-rv32.ll new file mode 100644 index 0000000000000..d90664c7eb372 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfnmacc-rv32.ll @@ -0,0 +1,856 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f,+experimental-zfh -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare @llvm.riscv.vfnmacc.nxv1f16.nxv1f16( + , + , + , + i32); + +define @intrinsic_vfnmacc_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv1f16_nxv1f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu +; CHECK-NEXT: vfnmacc.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmacc.nxv1f16.nxv1f16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfnmacc.mask.nxv1f16.nxv1f16( + , + , + , + , + i32); + +define @intrinsic_vfnmacc_mask_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_mask_vv_nxv1f16_nxv1f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu +; CHECK-NEXT: vfnmacc.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmacc.mask.nxv1f16.nxv1f16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfnmacc.nxv2f16.nxv2f16( + , + , + , + i32); + +define @intrinsic_vfnmacc_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv2f16_nxv2f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu +; CHECK-NEXT: vfnmacc.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmacc.nxv2f16.nxv2f16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfnmacc.mask.nxv2f16.nxv2f16( + , + , + , + , + i32); + +define @intrinsic_vfnmacc_mask_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_mask_vv_nxv2f16_nxv2f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu +; CHECK-NEXT: vfnmacc.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmacc.mask.nxv2f16.nxv2f16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfnmacc.nxv4f16.nxv4f16( + , + , + , + i32); + +define @intrinsic_vfnmacc_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv4f16_nxv4f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vfnmacc.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmacc.nxv4f16.nxv4f16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfnmacc.mask.nxv4f16.nxv4f16( + , + , + , + , + i32); + +define @intrinsic_vfnmacc_mask_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_mask_vv_nxv4f16_nxv4f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vfnmacc.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmacc.mask.nxv4f16.nxv4f16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfnmacc.nxv8f16.nxv8f16( + , + , + , + i32); + +define @intrinsic_vfnmacc_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv8f16_nxv8f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu +; CHECK-NEXT: vfnmacc.vv v16, v18, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmacc.nxv8f16.nxv8f16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfnmacc.mask.nxv8f16.nxv8f16( + , + , + , + , + i32); + +define @intrinsic_vfnmacc_mask_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_mask_vv_nxv8f16_nxv8f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu +; CHECK-NEXT: vfnmacc.vv v16, v18, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmacc.mask.nxv8f16.nxv8f16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfnmacc.nxv16f16.nxv16f16( + , + , + , + i32); + +define @intrinsic_vfnmacc_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv16f16_nxv16f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a0) +; CHECK-NEXT: vsetvli a0, a1, e16,m4,ta,mu +; CHECK-NEXT: vfnmacc.vv v16, v20, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmacc.nxv16f16.nxv16f16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfnmacc.mask.nxv16f16.nxv16f16( + , + , + , + , + i32); + +define @intrinsic_vfnmacc_mask_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_mask_vv_nxv16f16_nxv16f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a0) +; CHECK-NEXT: vsetvli a0, a1, e16,m4,ta,mu +; CHECK-NEXT: vfnmacc.vv v16, v20, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmacc.mask.nxv16f16.nxv16f16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfnmacc.nxv1f32.nxv1f32( + , + , + , + i32); + +define @intrinsic_vfnmacc_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv1f32_nxv1f32_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,mf2,ta,mu +; CHECK-NEXT: vfnmacc.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmacc.nxv1f32.nxv1f32( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfnmacc.mask.nxv1f32.nxv1f32( + , + , + , + , + i32); + +define @intrinsic_vfnmacc_mask_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_mask_vv_nxv1f32_nxv1f32_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,mf2,ta,mu +; CHECK-NEXT: vfnmacc.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmacc.mask.nxv1f32.nxv1f32( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfnmacc.nxv2f32.nxv2f32( + , + , + , + i32); + +define @intrinsic_vfnmacc_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv2f32_nxv2f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; CHECK-NEXT: vfnmacc.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmacc.nxv2f32.nxv2f32( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfnmacc.mask.nxv2f32.nxv2f32( + , + , + , + , + i32); + +define @intrinsic_vfnmacc_mask_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_mask_vv_nxv2f32_nxv2f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; CHECK-NEXT: vfnmacc.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmacc.mask.nxv2f32.nxv2f32( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfnmacc.nxv4f32.nxv4f32( + , + , + , + i32); + +define @intrinsic_vfnmacc_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv4f32_nxv4f32_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu +; CHECK-NEXT: vfnmacc.vv v16, v18, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmacc.nxv4f32.nxv4f32( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfnmacc.mask.nxv4f32.nxv4f32( + , + , + , + , + i32); + +define @intrinsic_vfnmacc_mask_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_mask_vv_nxv4f32_nxv4f32_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu +; CHECK-NEXT: vfnmacc.vv v16, v18, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmacc.mask.nxv4f32.nxv4f32( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfnmacc.nxv8f32.nxv8f32( + , + , + , + i32); + +define @intrinsic_vfnmacc_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv8f32_nxv8f32_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e32,m4,ta,mu +; CHECK-NEXT: vle32.v v28, (a0) +; CHECK-NEXT: vsetvli a0, a1, e32,m4,ta,mu +; CHECK-NEXT: vfnmacc.vv v16, v20, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmacc.nxv8f32.nxv8f32( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfnmacc.mask.nxv8f32.nxv8f32( + , + , + , + , + i32); + +define @intrinsic_vfnmacc_mask_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_mask_vv_nxv8f32_nxv8f32_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e32,m4,ta,mu +; CHECK-NEXT: vle32.v v28, (a0) +; CHECK-NEXT: vsetvli a0, a1, e32,m4,ta,mu +; CHECK-NEXT: vfnmacc.vv v16, v20, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmacc.mask.nxv8f32.nxv8f32( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfnmacc.nxv1f16.f16( + , + half, + , + i32); + +define @intrinsic_vfnmacc_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv1f16_f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf4,ta,mu +; CHECK-NEXT: vfnmacc.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmacc.nxv1f16.f16( + %0, + half %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfnmacc.mask.nxv1f16.f16( + , + half, + , + , + i32); + +define @intrinsic_vfnmacc_mask_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv1f16_f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf4,ta,mu +; CHECK-NEXT: vfnmacc.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmacc.mask.nxv1f16.f16( + %0, + half %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfnmacc.nxv2f16.f16( + , + half, + , + i32); + +define @intrinsic_vfnmacc_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv2f16_f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf2,ta,mu +; CHECK-NEXT: vfnmacc.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmacc.nxv2f16.f16( + %0, + half %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfnmacc.mask.nxv2f16.f16( + , + half, + , + , + i32); + +define @intrinsic_vfnmacc_mask_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv2f16_f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf2,ta,mu +; CHECK-NEXT: vfnmacc.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmacc.mask.nxv2f16.f16( + %0, + half %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfnmacc.nxv4f16.f16( + , + half, + , + i32); + +define @intrinsic_vfnmacc_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv4f16_f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m1,ta,mu +; CHECK-NEXT: vfnmacc.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmacc.nxv4f16.f16( + %0, + half %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfnmacc.mask.nxv4f16.f16( + , + half, + , + , + i32); + +define @intrinsic_vfnmacc_mask_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv4f16_f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m1,ta,mu +; CHECK-NEXT: vfnmacc.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmacc.mask.nxv4f16.f16( + %0, + half %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfnmacc.nxv8f16.f16( + , + half, + , + i32); + +define @intrinsic_vfnmacc_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv8f16_f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m2,ta,mu +; CHECK-NEXT: vfnmacc.vf v16, ft0, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmacc.nxv8f16.f16( + %0, + half %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfnmacc.mask.nxv8f16.f16( + , + half, + , + , + i32); + +define @intrinsic_vfnmacc_mask_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv8f16_f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m2,ta,mu +; CHECK-NEXT: vfnmacc.vf v16, ft0, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmacc.mask.nxv8f16.f16( + %0, + half %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfnmacc.nxv16f16.f16( + , + half, + , + i32); + +define @intrinsic_vfnmacc_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv16f16_f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m4,ta,mu +; CHECK-NEXT: vfnmacc.vf v16, ft0, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmacc.nxv16f16.f16( + %0, + half %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfnmacc.mask.nxv16f16.f16( + , + half, + , + , + i32); + +define @intrinsic_vfnmacc_mask_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv16f16_f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m4,ta,mu +; CHECK-NEXT: vfnmacc.vf v16, ft0, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmacc.mask.nxv16f16.f16( + %0, + half %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfnmacc.nxv1f32.f32( + , + float, + , + i32); + +define @intrinsic_vfnmacc_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv1f32_f32_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,mf2,ta,mu +; CHECK-NEXT: vfnmacc.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmacc.nxv1f32.f32( + %0, + float %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfnmacc.mask.nxv1f32.f32( + , + float, + , + , + i32); + +define @intrinsic_vfnmacc_mask_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv1f32_f32_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,mf2,ta,mu +; CHECK-NEXT: vfnmacc.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmacc.mask.nxv1f32.f32( + %0, + float %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfnmacc.nxv2f32.f32( + , + float, + , + i32); + +define @intrinsic_vfnmacc_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv2f32_f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m1,ta,mu +; CHECK-NEXT: vfnmacc.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmacc.nxv2f32.f32( + %0, + float %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfnmacc.mask.nxv2f32.f32( + , + float, + , + , + i32); + +define @intrinsic_vfnmacc_mask_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv2f32_f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m1,ta,mu +; CHECK-NEXT: vfnmacc.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmacc.mask.nxv2f32.f32( + %0, + float %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfnmacc.nxv4f32.f32( + , + float, + , + i32); + +define @intrinsic_vfnmacc_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv4f32_f32_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m2,ta,mu +; CHECK-NEXT: vfnmacc.vf v16, ft0, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmacc.nxv4f32.f32( + %0, + float %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfnmacc.mask.nxv4f32.f32( + , + float, + , + , + i32); + +define @intrinsic_vfnmacc_mask_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv4f32_f32_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m2,ta,mu +; CHECK-NEXT: vfnmacc.vf v16, ft0, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmacc.mask.nxv4f32.f32( + %0, + float %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfnmacc.nxv8f32.f32( + , + float, + , + i32); + +define @intrinsic_vfnmacc_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv8f32_f32_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m4,ta,mu +; CHECK-NEXT: vfnmacc.vf v16, ft0, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmacc.nxv8f32.f32( + %0, + float %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfnmacc.mask.nxv8f32.f32( + , + float, + , + , + i32); + +define @intrinsic_vfnmacc_mask_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv8f32_f32_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m4,ta,mu +; CHECK-NEXT: vfnmacc.vf v16, ft0, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmacc.mask.nxv8f32.f32( + %0, + float %1, + %2, + %3, + i32 %4) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmacc-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmacc-rv64.ll new file mode 100644 index 0000000000000..9e113de8e9ca7 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfnmacc-rv64.ll @@ -0,0 +1,1142 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+experimental-zfh -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare @llvm.riscv.vfnmacc.nxv1f16.nxv1f16( + , + , + , + i64); + +define @intrinsic_vfnmacc_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv1f16_nxv1f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu +; CHECK-NEXT: vfnmacc.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmacc.nxv1f16.nxv1f16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmacc.mask.nxv1f16.nxv1f16( + , + , + , + , + i64); + +define @intrinsic_vfnmacc_mask_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_mask_vv_nxv1f16_nxv1f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu +; CHECK-NEXT: vfnmacc.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmacc.mask.nxv1f16.nxv1f16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfnmacc.nxv2f16.nxv2f16( + , + , + , + i64); + +define @intrinsic_vfnmacc_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv2f16_nxv2f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu +; CHECK-NEXT: vfnmacc.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmacc.nxv2f16.nxv2f16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmacc.mask.nxv2f16.nxv2f16( + , + , + , + , + i64); + +define @intrinsic_vfnmacc_mask_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_mask_vv_nxv2f16_nxv2f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu +; CHECK-NEXT: vfnmacc.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmacc.mask.nxv2f16.nxv2f16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfnmacc.nxv4f16.nxv4f16( + , + , + , + i64); + +define @intrinsic_vfnmacc_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv4f16_nxv4f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vfnmacc.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmacc.nxv4f16.nxv4f16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmacc.mask.nxv4f16.nxv4f16( + , + , + , + , + i64); + +define @intrinsic_vfnmacc_mask_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_mask_vv_nxv4f16_nxv4f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vfnmacc.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmacc.mask.nxv4f16.nxv4f16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfnmacc.nxv8f16.nxv8f16( + , + , + , + i64); + +define @intrinsic_vfnmacc_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv8f16_nxv8f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu +; CHECK-NEXT: vfnmacc.vv v16, v18, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmacc.nxv8f16.nxv8f16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmacc.mask.nxv8f16.nxv8f16( + , + , + , + , + i64); + +define @intrinsic_vfnmacc_mask_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_mask_vv_nxv8f16_nxv8f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu +; CHECK-NEXT: vfnmacc.vv v16, v18, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmacc.mask.nxv8f16.nxv8f16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfnmacc.nxv16f16.nxv16f16( + , + , + , + i64); + +define @intrinsic_vfnmacc_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv16f16_nxv16f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a0) +; CHECK-NEXT: vsetvli a0, a1, e16,m4,ta,mu +; CHECK-NEXT: vfnmacc.vv v16, v20, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmacc.nxv16f16.nxv16f16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmacc.mask.nxv16f16.nxv16f16( + , + , + , + , + i64); + +define @intrinsic_vfnmacc_mask_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_mask_vv_nxv16f16_nxv16f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a0) +; CHECK-NEXT: vsetvli a0, a1, e16,m4,ta,mu +; CHECK-NEXT: vfnmacc.vv v16, v20, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmacc.mask.nxv16f16.nxv16f16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfnmacc.nxv1f32.nxv1f32( + , + , + , + i64); + +define @intrinsic_vfnmacc_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv1f32_nxv1f32_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,mf2,ta,mu +; CHECK-NEXT: vfnmacc.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmacc.nxv1f32.nxv1f32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmacc.mask.nxv1f32.nxv1f32( + , + , + , + , + i64); + +define @intrinsic_vfnmacc_mask_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_mask_vv_nxv1f32_nxv1f32_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,mf2,ta,mu +; CHECK-NEXT: vfnmacc.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmacc.mask.nxv1f32.nxv1f32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfnmacc.nxv2f32.nxv2f32( + , + , + , + i64); + +define @intrinsic_vfnmacc_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv2f32_nxv2f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; CHECK-NEXT: vfnmacc.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmacc.nxv2f32.nxv2f32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmacc.mask.nxv2f32.nxv2f32( + , + , + , + , + i64); + +define @intrinsic_vfnmacc_mask_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_mask_vv_nxv2f32_nxv2f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; CHECK-NEXT: vfnmacc.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmacc.mask.nxv2f32.nxv2f32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfnmacc.nxv4f32.nxv4f32( + , + , + , + i64); + +define @intrinsic_vfnmacc_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv4f32_nxv4f32_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu +; CHECK-NEXT: vfnmacc.vv v16, v18, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmacc.nxv4f32.nxv4f32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmacc.mask.nxv4f32.nxv4f32( + , + , + , + , + i64); + +define @intrinsic_vfnmacc_mask_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_mask_vv_nxv4f32_nxv4f32_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu +; CHECK-NEXT: vfnmacc.vv v16, v18, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmacc.mask.nxv4f32.nxv4f32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfnmacc.nxv8f32.nxv8f32( + , + , + , + i64); + +define @intrinsic_vfnmacc_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv8f32_nxv8f32_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e32,m4,ta,mu +; CHECK-NEXT: vle32.v v28, (a0) +; CHECK-NEXT: vsetvli a0, a1, e32,m4,ta,mu +; CHECK-NEXT: vfnmacc.vv v16, v20, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmacc.nxv8f32.nxv8f32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmacc.mask.nxv8f32.nxv8f32( + , + , + , + , + i64); + +define @intrinsic_vfnmacc_mask_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_mask_vv_nxv8f32_nxv8f32_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e32,m4,ta,mu +; CHECK-NEXT: vle32.v v28, (a0) +; CHECK-NEXT: vsetvli a0, a1, e32,m4,ta,mu +; CHECK-NEXT: vfnmacc.vv v16, v20, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmacc.mask.nxv8f32.nxv8f32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfnmacc.nxv1f64.nxv1f64( + , + , + , + i64); + +define @intrinsic_vfnmacc_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv1f64_nxv1f64_nxv1f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu +; CHECK-NEXT: vfnmacc.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmacc.nxv1f64.nxv1f64( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmacc.mask.nxv1f64.nxv1f64( + , + , + , + , + i64); + +define @intrinsic_vfnmacc_mask_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_mask_vv_nxv1f64_nxv1f64_nxv1f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu +; CHECK-NEXT: vfnmacc.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmacc.mask.nxv1f64.nxv1f64( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfnmacc.nxv2f64.nxv2f64( + , + , + , + i64); + +define @intrinsic_vfnmacc_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv2f64_nxv2f64_nxv2f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e64,m2,ta,mu +; CHECK-NEXT: vfnmacc.vv v16, v18, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmacc.nxv2f64.nxv2f64( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmacc.mask.nxv2f64.nxv2f64( + , + , + , + , + i64); + +define @intrinsic_vfnmacc_mask_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_mask_vv_nxv2f64_nxv2f64_nxv2f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e64,m2,ta,mu +; CHECK-NEXT: vfnmacc.vv v16, v18, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmacc.mask.nxv2f64.nxv2f64( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfnmacc.nxv4f64.nxv4f64( + , + , + , + i64); + +define @intrinsic_vfnmacc_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv4f64_nxv4f64_nxv4f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e64,m4,ta,mu +; CHECK-NEXT: vle64.v v28, (a0) +; CHECK-NEXT: vsetvli a0, a1, e64,m4,ta,mu +; CHECK-NEXT: vfnmacc.vv v16, v20, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmacc.nxv4f64.nxv4f64( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmacc.mask.nxv4f64.nxv4f64( + , + , + , + , + i64); + +define @intrinsic_vfnmacc_mask_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_mask_vv_nxv4f64_nxv4f64_nxv4f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e64,m4,ta,mu +; CHECK-NEXT: vle64.v v28, (a0) +; CHECK-NEXT: vsetvli a0, a1, e64,m4,ta,mu +; CHECK-NEXT: vfnmacc.vv v16, v20, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmacc.mask.nxv4f64.nxv4f64( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfnmacc.nxv1f16.f16( + , + half, + , + i64); + +define @intrinsic_vfnmacc_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv1f16_f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf4,ta,mu +; CHECK-NEXT: vfnmacc.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmacc.nxv1f16.f16( + %0, + half %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmacc.mask.nxv1f16.f16( + , + half, + , + , + i64); + +define @intrinsic_vfnmacc_mask_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv1f16_f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf4,ta,mu +; CHECK-NEXT: vfnmacc.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmacc.mask.nxv1f16.f16( + %0, + half %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfnmacc.nxv2f16.f16( + , + half, + , + i64); + +define @intrinsic_vfnmacc_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv2f16_f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf2,ta,mu +; CHECK-NEXT: vfnmacc.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmacc.nxv2f16.f16( + %0, + half %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmacc.mask.nxv2f16.f16( + , + half, + , + , + i64); + +define @intrinsic_vfnmacc_mask_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv2f16_f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf2,ta,mu +; CHECK-NEXT: vfnmacc.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmacc.mask.nxv2f16.f16( + %0, + half %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfnmacc.nxv4f16.f16( + , + half, + , + i64); + +define @intrinsic_vfnmacc_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv4f16_f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m1,ta,mu +; CHECK-NEXT: vfnmacc.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmacc.nxv4f16.f16( + %0, + half %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmacc.mask.nxv4f16.f16( + , + half, + , + , + i64); + +define @intrinsic_vfnmacc_mask_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv4f16_f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m1,ta,mu +; CHECK-NEXT: vfnmacc.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmacc.mask.nxv4f16.f16( + %0, + half %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfnmacc.nxv8f16.f16( + , + half, + , + i64); + +define @intrinsic_vfnmacc_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv8f16_f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m2,ta,mu +; CHECK-NEXT: vfnmacc.vf v16, ft0, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmacc.nxv8f16.f16( + %0, + half %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmacc.mask.nxv8f16.f16( + , + half, + , + , + i64); + +define @intrinsic_vfnmacc_mask_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv8f16_f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m2,ta,mu +; CHECK-NEXT: vfnmacc.vf v16, ft0, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmacc.mask.nxv8f16.f16( + %0, + half %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfnmacc.nxv16f16.f16( + , + half, + , + i64); + +define @intrinsic_vfnmacc_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv16f16_f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m4,ta,mu +; CHECK-NEXT: vfnmacc.vf v16, ft0, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmacc.nxv16f16.f16( + %0, + half %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmacc.mask.nxv16f16.f16( + , + half, + , + , + i64); + +define @intrinsic_vfnmacc_mask_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv16f16_f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m4,ta,mu +; CHECK-NEXT: vfnmacc.vf v16, ft0, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmacc.mask.nxv16f16.f16( + %0, + half %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfnmacc.nxv1f32.f32( + , + float, + , + i64); + +define @intrinsic_vfnmacc_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv1f32_f32_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,mf2,ta,mu +; CHECK-NEXT: vfnmacc.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmacc.nxv1f32.f32( + %0, + float %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmacc.mask.nxv1f32.f32( + , + float, + , + , + i64); + +define @intrinsic_vfnmacc_mask_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv1f32_f32_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,mf2,ta,mu +; CHECK-NEXT: vfnmacc.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmacc.mask.nxv1f32.f32( + %0, + float %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfnmacc.nxv2f32.f32( + , + float, + , + i64); + +define @intrinsic_vfnmacc_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv2f32_f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m1,ta,mu +; CHECK-NEXT: vfnmacc.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmacc.nxv2f32.f32( + %0, + float %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmacc.mask.nxv2f32.f32( + , + float, + , + , + i64); + +define @intrinsic_vfnmacc_mask_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv2f32_f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m1,ta,mu +; CHECK-NEXT: vfnmacc.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmacc.mask.nxv2f32.f32( + %0, + float %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfnmacc.nxv4f32.f32( + , + float, + , + i64); + +define @intrinsic_vfnmacc_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv4f32_f32_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m2,ta,mu +; CHECK-NEXT: vfnmacc.vf v16, ft0, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmacc.nxv4f32.f32( + %0, + float %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmacc.mask.nxv4f32.f32( + , + float, + , + , + i64); + +define @intrinsic_vfnmacc_mask_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv4f32_f32_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m2,ta,mu +; CHECK-NEXT: vfnmacc.vf v16, ft0, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmacc.mask.nxv4f32.f32( + %0, + float %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfnmacc.nxv8f32.f32( + , + float, + , + i64); + +define @intrinsic_vfnmacc_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv8f32_f32_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m4,ta,mu +; CHECK-NEXT: vfnmacc.vf v16, ft0, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmacc.nxv8f32.f32( + %0, + float %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmacc.mask.nxv8f32.f32( + , + float, + , + , + i64); + +define @intrinsic_vfnmacc_mask_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv8f32_f32_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m4,ta,mu +; CHECK-NEXT: vfnmacc.vf v16, ft0, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmacc.mask.nxv8f32.f32( + %0, + float %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfnmacc.nxv1f64.f64( + , + double, + , + i64); + +define @intrinsic_vfnmacc_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv1f64_f64_nxv1f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.d.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e64,m1,ta,mu +; CHECK-NEXT: vfnmacc.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmacc.nxv1f64.f64( + %0, + double %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmacc.mask.nxv1f64.f64( + , + double, + , + , + i64); + +define @intrinsic_vfnmacc_mask_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv1f64_f64_nxv1f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.d.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e64,m1,ta,mu +; CHECK-NEXT: vfnmacc.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmacc.mask.nxv1f64.f64( + %0, + double %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfnmacc.nxv2f64.f64( + , + double, + , + i64); + +define @intrinsic_vfnmacc_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv2f64_f64_nxv2f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.d.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e64,m2,ta,mu +; CHECK-NEXT: vfnmacc.vf v16, ft0, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmacc.nxv2f64.f64( + %0, + double %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmacc.mask.nxv2f64.f64( + , + double, + , + , + i64); + +define @intrinsic_vfnmacc_mask_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv2f64_f64_nxv2f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.d.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e64,m2,ta,mu +; CHECK-NEXT: vfnmacc.vf v16, ft0, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmacc.mask.nxv2f64.f64( + %0, + double %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfnmacc.nxv4f64.f64( + , + double, + , + i64); + +define @intrinsic_vfnmacc_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv4f64_f64_nxv4f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.d.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e64,m4,ta,mu +; CHECK-NEXT: vfnmacc.vf v16, ft0, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmacc.nxv4f64.f64( + %0, + double %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmacc.mask.nxv4f64.f64( + , + double, + , + , + i64); + +define @intrinsic_vfnmacc_mask_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv4f64_f64_nxv4f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.d.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e64,m4,ta,mu +; CHECK-NEXT: vfnmacc.vf v16, ft0, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmacc.mask.nxv4f64.f64( + %0, + double %1, + %2, + %3, + i64 %4) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmadd-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmadd-rv32.ll new file mode 100644 index 0000000000000..abea8d688fa31 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfnmadd-rv32.ll @@ -0,0 +1,856 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f,+experimental-zfh -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare @llvm.riscv.vfnmadd.nxv1f16.nxv1f16( + , + , + , + i32); + +define @intrinsic_vfnmadd_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv1f16_nxv1f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu +; CHECK-NEXT: vfnmadd.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmadd.nxv1f16.nxv1f16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfnmadd.mask.nxv1f16.nxv1f16( + , + , + , + , + i32); + +define @intrinsic_vfnmadd_mask_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_mask_vv_nxv1f16_nxv1f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu +; CHECK-NEXT: vfnmadd.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmadd.mask.nxv1f16.nxv1f16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfnmadd.nxv2f16.nxv2f16( + , + , + , + i32); + +define @intrinsic_vfnmadd_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv2f16_nxv2f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu +; CHECK-NEXT: vfnmadd.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmadd.nxv2f16.nxv2f16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfnmadd.mask.nxv2f16.nxv2f16( + , + , + , + , + i32); + +define @intrinsic_vfnmadd_mask_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_mask_vv_nxv2f16_nxv2f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu +; CHECK-NEXT: vfnmadd.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmadd.mask.nxv2f16.nxv2f16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfnmadd.nxv4f16.nxv4f16( + , + , + , + i32); + +define @intrinsic_vfnmadd_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv4f16_nxv4f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vfnmadd.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmadd.nxv4f16.nxv4f16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfnmadd.mask.nxv4f16.nxv4f16( + , + , + , + , + i32); + +define @intrinsic_vfnmadd_mask_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_mask_vv_nxv4f16_nxv4f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vfnmadd.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmadd.mask.nxv4f16.nxv4f16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfnmadd.nxv8f16.nxv8f16( + , + , + , + i32); + +define @intrinsic_vfnmadd_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv8f16_nxv8f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu +; CHECK-NEXT: vfnmadd.vv v16, v18, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmadd.nxv8f16.nxv8f16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfnmadd.mask.nxv8f16.nxv8f16( + , + , + , + , + i32); + +define @intrinsic_vfnmadd_mask_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_mask_vv_nxv8f16_nxv8f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu +; CHECK-NEXT: vfnmadd.vv v16, v18, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmadd.mask.nxv8f16.nxv8f16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfnmadd.nxv16f16.nxv16f16( + , + , + , + i32); + +define @intrinsic_vfnmadd_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv16f16_nxv16f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a0) +; CHECK-NEXT: vsetvli a0, a1, e16,m4,ta,mu +; CHECK-NEXT: vfnmadd.vv v16, v20, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmadd.nxv16f16.nxv16f16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfnmadd.mask.nxv16f16.nxv16f16( + , + , + , + , + i32); + +define @intrinsic_vfnmadd_mask_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_mask_vv_nxv16f16_nxv16f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a0) +; CHECK-NEXT: vsetvli a0, a1, e16,m4,ta,mu +; CHECK-NEXT: vfnmadd.vv v16, v20, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmadd.mask.nxv16f16.nxv16f16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfnmadd.nxv1f32.nxv1f32( + , + , + , + i32); + +define @intrinsic_vfnmadd_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv1f32_nxv1f32_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,mf2,ta,mu +; CHECK-NEXT: vfnmadd.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmadd.nxv1f32.nxv1f32( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfnmadd.mask.nxv1f32.nxv1f32( + , + , + , + , + i32); + +define @intrinsic_vfnmadd_mask_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_mask_vv_nxv1f32_nxv1f32_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,mf2,ta,mu +; CHECK-NEXT: vfnmadd.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmadd.mask.nxv1f32.nxv1f32( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfnmadd.nxv2f32.nxv2f32( + , + , + , + i32); + +define @intrinsic_vfnmadd_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv2f32_nxv2f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; CHECK-NEXT: vfnmadd.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmadd.nxv2f32.nxv2f32( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfnmadd.mask.nxv2f32.nxv2f32( + , + , + , + , + i32); + +define @intrinsic_vfnmadd_mask_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_mask_vv_nxv2f32_nxv2f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; CHECK-NEXT: vfnmadd.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmadd.mask.nxv2f32.nxv2f32( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfnmadd.nxv4f32.nxv4f32( + , + , + , + i32); + +define @intrinsic_vfnmadd_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv4f32_nxv4f32_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu +; CHECK-NEXT: vfnmadd.vv v16, v18, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmadd.nxv4f32.nxv4f32( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfnmadd.mask.nxv4f32.nxv4f32( + , + , + , + , + i32); + +define @intrinsic_vfnmadd_mask_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_mask_vv_nxv4f32_nxv4f32_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu +; CHECK-NEXT: vfnmadd.vv v16, v18, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmadd.mask.nxv4f32.nxv4f32( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfnmadd.nxv8f32.nxv8f32( + , + , + , + i32); + +define @intrinsic_vfnmadd_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv8f32_nxv8f32_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e32,m4,ta,mu +; CHECK-NEXT: vle32.v v28, (a0) +; CHECK-NEXT: vsetvli a0, a1, e32,m4,ta,mu +; CHECK-NEXT: vfnmadd.vv v16, v20, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmadd.nxv8f32.nxv8f32( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfnmadd.mask.nxv8f32.nxv8f32( + , + , + , + , + i32); + +define @intrinsic_vfnmadd_mask_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_mask_vv_nxv8f32_nxv8f32_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e32,m4,ta,mu +; CHECK-NEXT: vle32.v v28, (a0) +; CHECK-NEXT: vsetvli a0, a1, e32,m4,ta,mu +; CHECK-NEXT: vfnmadd.vv v16, v20, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmadd.mask.nxv8f32.nxv8f32( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfnmadd.nxv1f16.f16( + , + half, + , + i32); + +define @intrinsic_vfnmadd_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv1f16_f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf4,ta,mu +; CHECK-NEXT: vfnmadd.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmadd.nxv1f16.f16( + %0, + half %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfnmadd.mask.nxv1f16.f16( + , + half, + , + , + i32); + +define @intrinsic_vfnmadd_mask_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv1f16_f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf4,ta,mu +; CHECK-NEXT: vfnmadd.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmadd.mask.nxv1f16.f16( + %0, + half %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfnmadd.nxv2f16.f16( + , + half, + , + i32); + +define @intrinsic_vfnmadd_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv2f16_f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf2,ta,mu +; CHECK-NEXT: vfnmadd.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmadd.nxv2f16.f16( + %0, + half %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfnmadd.mask.nxv2f16.f16( + , + half, + , + , + i32); + +define @intrinsic_vfnmadd_mask_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv2f16_f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf2,ta,mu +; CHECK-NEXT: vfnmadd.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmadd.mask.nxv2f16.f16( + %0, + half %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfnmadd.nxv4f16.f16( + , + half, + , + i32); + +define @intrinsic_vfnmadd_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv4f16_f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m1,ta,mu +; CHECK-NEXT: vfnmadd.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmadd.nxv4f16.f16( + %0, + half %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfnmadd.mask.nxv4f16.f16( + , + half, + , + , + i32); + +define @intrinsic_vfnmadd_mask_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv4f16_f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m1,ta,mu +; CHECK-NEXT: vfnmadd.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmadd.mask.nxv4f16.f16( + %0, + half %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfnmadd.nxv8f16.f16( + , + half, + , + i32); + +define @intrinsic_vfnmadd_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv8f16_f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m2,ta,mu +; CHECK-NEXT: vfnmadd.vf v16, ft0, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmadd.nxv8f16.f16( + %0, + half %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfnmadd.mask.nxv8f16.f16( + , + half, + , + , + i32); + +define @intrinsic_vfnmadd_mask_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv8f16_f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m2,ta,mu +; CHECK-NEXT: vfnmadd.vf v16, ft0, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmadd.mask.nxv8f16.f16( + %0, + half %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfnmadd.nxv16f16.f16( + , + half, + , + i32); + +define @intrinsic_vfnmadd_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv16f16_f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m4,ta,mu +; CHECK-NEXT: vfnmadd.vf v16, ft0, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmadd.nxv16f16.f16( + %0, + half %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfnmadd.mask.nxv16f16.f16( + , + half, + , + , + i32); + +define @intrinsic_vfnmadd_mask_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv16f16_f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m4,ta,mu +; CHECK-NEXT: vfnmadd.vf v16, ft0, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmadd.mask.nxv16f16.f16( + %0, + half %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfnmadd.nxv1f32.f32( + , + float, + , + i32); + +define @intrinsic_vfnmadd_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv1f32_f32_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,mf2,ta,mu +; CHECK-NEXT: vfnmadd.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmadd.nxv1f32.f32( + %0, + float %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfnmadd.mask.nxv1f32.f32( + , + float, + , + , + i32); + +define @intrinsic_vfnmadd_mask_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv1f32_f32_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,mf2,ta,mu +; CHECK-NEXT: vfnmadd.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmadd.mask.nxv1f32.f32( + %0, + float %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfnmadd.nxv2f32.f32( + , + float, + , + i32); + +define @intrinsic_vfnmadd_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv2f32_f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m1,ta,mu +; CHECK-NEXT: vfnmadd.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmadd.nxv2f32.f32( + %0, + float %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfnmadd.mask.nxv2f32.f32( + , + float, + , + , + i32); + +define @intrinsic_vfnmadd_mask_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv2f32_f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m1,ta,mu +; CHECK-NEXT: vfnmadd.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmadd.mask.nxv2f32.f32( + %0, + float %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfnmadd.nxv4f32.f32( + , + float, + , + i32); + +define @intrinsic_vfnmadd_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv4f32_f32_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m2,ta,mu +; CHECK-NEXT: vfnmadd.vf v16, ft0, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmadd.nxv4f32.f32( + %0, + float %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfnmadd.mask.nxv4f32.f32( + , + float, + , + , + i32); + +define @intrinsic_vfnmadd_mask_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv4f32_f32_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m2,ta,mu +; CHECK-NEXT: vfnmadd.vf v16, ft0, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmadd.mask.nxv4f32.f32( + %0, + float %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfnmadd.nxv8f32.f32( + , + float, + , + i32); + +define @intrinsic_vfnmadd_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv8f32_f32_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m4,ta,mu +; CHECK-NEXT: vfnmadd.vf v16, ft0, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmadd.nxv8f32.f32( + %0, + float %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfnmadd.mask.nxv8f32.f32( + , + float, + , + , + i32); + +define @intrinsic_vfnmadd_mask_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv8f32_f32_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m4,ta,mu +; CHECK-NEXT: vfnmadd.vf v16, ft0, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmadd.mask.nxv8f32.f32( + %0, + float %1, + %2, + %3, + i32 %4) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmadd-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmadd-rv64.ll new file mode 100644 index 0000000000000..4b4b8136fe5a4 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfnmadd-rv64.ll @@ -0,0 +1,1142 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+experimental-zfh -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare @llvm.riscv.vfnmadd.nxv1f16.nxv1f16( + , + , + , + i64); + +define @intrinsic_vfnmadd_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv1f16_nxv1f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu +; CHECK-NEXT: vfnmadd.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmadd.nxv1f16.nxv1f16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmadd.mask.nxv1f16.nxv1f16( + , + , + , + , + i64); + +define @intrinsic_vfnmadd_mask_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_mask_vv_nxv1f16_nxv1f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu +; CHECK-NEXT: vfnmadd.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmadd.mask.nxv1f16.nxv1f16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfnmadd.nxv2f16.nxv2f16( + , + , + , + i64); + +define @intrinsic_vfnmadd_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv2f16_nxv2f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu +; CHECK-NEXT: vfnmadd.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmadd.nxv2f16.nxv2f16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmadd.mask.nxv2f16.nxv2f16( + , + , + , + , + i64); + +define @intrinsic_vfnmadd_mask_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_mask_vv_nxv2f16_nxv2f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu +; CHECK-NEXT: vfnmadd.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmadd.mask.nxv2f16.nxv2f16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfnmadd.nxv4f16.nxv4f16( + , + , + , + i64); + +define @intrinsic_vfnmadd_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv4f16_nxv4f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vfnmadd.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmadd.nxv4f16.nxv4f16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmadd.mask.nxv4f16.nxv4f16( + , + , + , + , + i64); + +define @intrinsic_vfnmadd_mask_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_mask_vv_nxv4f16_nxv4f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vfnmadd.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmadd.mask.nxv4f16.nxv4f16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfnmadd.nxv8f16.nxv8f16( + , + , + , + i64); + +define @intrinsic_vfnmadd_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv8f16_nxv8f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu +; CHECK-NEXT: vfnmadd.vv v16, v18, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmadd.nxv8f16.nxv8f16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmadd.mask.nxv8f16.nxv8f16( + , + , + , + , + i64); + +define @intrinsic_vfnmadd_mask_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_mask_vv_nxv8f16_nxv8f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu +; CHECK-NEXT: vfnmadd.vv v16, v18, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmadd.mask.nxv8f16.nxv8f16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfnmadd.nxv16f16.nxv16f16( + , + , + , + i64); + +define @intrinsic_vfnmadd_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv16f16_nxv16f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a0) +; CHECK-NEXT: vsetvli a0, a1, e16,m4,ta,mu +; CHECK-NEXT: vfnmadd.vv v16, v20, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmadd.nxv16f16.nxv16f16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmadd.mask.nxv16f16.nxv16f16( + , + , + , + , + i64); + +define @intrinsic_vfnmadd_mask_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_mask_vv_nxv16f16_nxv16f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a0) +; CHECK-NEXT: vsetvli a0, a1, e16,m4,ta,mu +; CHECK-NEXT: vfnmadd.vv v16, v20, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmadd.mask.nxv16f16.nxv16f16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfnmadd.nxv1f32.nxv1f32( + , + , + , + i64); + +define @intrinsic_vfnmadd_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv1f32_nxv1f32_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,mf2,ta,mu +; CHECK-NEXT: vfnmadd.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmadd.nxv1f32.nxv1f32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmadd.mask.nxv1f32.nxv1f32( + , + , + , + , + i64); + +define @intrinsic_vfnmadd_mask_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_mask_vv_nxv1f32_nxv1f32_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,mf2,ta,mu +; CHECK-NEXT: vfnmadd.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmadd.mask.nxv1f32.nxv1f32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfnmadd.nxv2f32.nxv2f32( + , + , + , + i64); + +define @intrinsic_vfnmadd_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv2f32_nxv2f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; CHECK-NEXT: vfnmadd.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmadd.nxv2f32.nxv2f32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmadd.mask.nxv2f32.nxv2f32( + , + , + , + , + i64); + +define @intrinsic_vfnmadd_mask_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_mask_vv_nxv2f32_nxv2f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; CHECK-NEXT: vfnmadd.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmadd.mask.nxv2f32.nxv2f32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfnmadd.nxv4f32.nxv4f32( + , + , + , + i64); + +define @intrinsic_vfnmadd_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv4f32_nxv4f32_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu +; CHECK-NEXT: vfnmadd.vv v16, v18, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmadd.nxv4f32.nxv4f32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmadd.mask.nxv4f32.nxv4f32( + , + , + , + , + i64); + +define @intrinsic_vfnmadd_mask_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_mask_vv_nxv4f32_nxv4f32_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu +; CHECK-NEXT: vfnmadd.vv v16, v18, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmadd.mask.nxv4f32.nxv4f32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfnmadd.nxv8f32.nxv8f32( + , + , + , + i64); + +define @intrinsic_vfnmadd_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv8f32_nxv8f32_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e32,m4,ta,mu +; CHECK-NEXT: vle32.v v28, (a0) +; CHECK-NEXT: vsetvli a0, a1, e32,m4,ta,mu +; CHECK-NEXT: vfnmadd.vv v16, v20, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmadd.nxv8f32.nxv8f32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmadd.mask.nxv8f32.nxv8f32( + , + , + , + , + i64); + +define @intrinsic_vfnmadd_mask_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_mask_vv_nxv8f32_nxv8f32_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e32,m4,ta,mu +; CHECK-NEXT: vle32.v v28, (a0) +; CHECK-NEXT: vsetvli a0, a1, e32,m4,ta,mu +; CHECK-NEXT: vfnmadd.vv v16, v20, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmadd.mask.nxv8f32.nxv8f32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfnmadd.nxv1f64.nxv1f64( + , + , + , + i64); + +define @intrinsic_vfnmadd_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv1f64_nxv1f64_nxv1f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu +; CHECK-NEXT: vfnmadd.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmadd.nxv1f64.nxv1f64( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmadd.mask.nxv1f64.nxv1f64( + , + , + , + , + i64); + +define @intrinsic_vfnmadd_mask_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_mask_vv_nxv1f64_nxv1f64_nxv1f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu +; CHECK-NEXT: vfnmadd.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmadd.mask.nxv1f64.nxv1f64( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfnmadd.nxv2f64.nxv2f64( + , + , + , + i64); + +define @intrinsic_vfnmadd_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv2f64_nxv2f64_nxv2f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e64,m2,ta,mu +; CHECK-NEXT: vfnmadd.vv v16, v18, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmadd.nxv2f64.nxv2f64( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmadd.mask.nxv2f64.nxv2f64( + , + , + , + , + i64); + +define @intrinsic_vfnmadd_mask_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_mask_vv_nxv2f64_nxv2f64_nxv2f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e64,m2,ta,mu +; CHECK-NEXT: vfnmadd.vv v16, v18, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmadd.mask.nxv2f64.nxv2f64( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfnmadd.nxv4f64.nxv4f64( + , + , + , + i64); + +define @intrinsic_vfnmadd_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv4f64_nxv4f64_nxv4f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e64,m4,ta,mu +; CHECK-NEXT: vle64.v v28, (a0) +; CHECK-NEXT: vsetvli a0, a1, e64,m4,ta,mu +; CHECK-NEXT: vfnmadd.vv v16, v20, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmadd.nxv4f64.nxv4f64( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmadd.mask.nxv4f64.nxv4f64( + , + , + , + , + i64); + +define @intrinsic_vfnmadd_mask_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_mask_vv_nxv4f64_nxv4f64_nxv4f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e64,m4,ta,mu +; CHECK-NEXT: vle64.v v28, (a0) +; CHECK-NEXT: vsetvli a0, a1, e64,m4,ta,mu +; CHECK-NEXT: vfnmadd.vv v16, v20, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmadd.mask.nxv4f64.nxv4f64( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfnmadd.nxv1f16.f16( + , + half, + , + i64); + +define @intrinsic_vfnmadd_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv1f16_f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf4,ta,mu +; CHECK-NEXT: vfnmadd.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmadd.nxv1f16.f16( + %0, + half %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmadd.mask.nxv1f16.f16( + , + half, + , + , + i64); + +define @intrinsic_vfnmadd_mask_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv1f16_f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf4,ta,mu +; CHECK-NEXT: vfnmadd.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmadd.mask.nxv1f16.f16( + %0, + half %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfnmadd.nxv2f16.f16( + , + half, + , + i64); + +define @intrinsic_vfnmadd_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv2f16_f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf2,ta,mu +; CHECK-NEXT: vfnmadd.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmadd.nxv2f16.f16( + %0, + half %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmadd.mask.nxv2f16.f16( + , + half, + , + , + i64); + +define @intrinsic_vfnmadd_mask_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv2f16_f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf2,ta,mu +; CHECK-NEXT: vfnmadd.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmadd.mask.nxv2f16.f16( + %0, + half %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfnmadd.nxv4f16.f16( + , + half, + , + i64); + +define @intrinsic_vfnmadd_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv4f16_f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m1,ta,mu +; CHECK-NEXT: vfnmadd.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmadd.nxv4f16.f16( + %0, + half %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmadd.mask.nxv4f16.f16( + , + half, + , + , + i64); + +define @intrinsic_vfnmadd_mask_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv4f16_f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m1,ta,mu +; CHECK-NEXT: vfnmadd.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmadd.mask.nxv4f16.f16( + %0, + half %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfnmadd.nxv8f16.f16( + , + half, + , + i64); + +define @intrinsic_vfnmadd_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv8f16_f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m2,ta,mu +; CHECK-NEXT: vfnmadd.vf v16, ft0, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmadd.nxv8f16.f16( + %0, + half %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmadd.mask.nxv8f16.f16( + , + half, + , + , + i64); + +define @intrinsic_vfnmadd_mask_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv8f16_f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m2,ta,mu +; CHECK-NEXT: vfnmadd.vf v16, ft0, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmadd.mask.nxv8f16.f16( + %0, + half %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfnmadd.nxv16f16.f16( + , + half, + , + i64); + +define @intrinsic_vfnmadd_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv16f16_f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m4,ta,mu +; CHECK-NEXT: vfnmadd.vf v16, ft0, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmadd.nxv16f16.f16( + %0, + half %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmadd.mask.nxv16f16.f16( + , + half, + , + , + i64); + +define @intrinsic_vfnmadd_mask_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv16f16_f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m4,ta,mu +; CHECK-NEXT: vfnmadd.vf v16, ft0, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmadd.mask.nxv16f16.f16( + %0, + half %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfnmadd.nxv1f32.f32( + , + float, + , + i64); + +define @intrinsic_vfnmadd_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv1f32_f32_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,mf2,ta,mu +; CHECK-NEXT: vfnmadd.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmadd.nxv1f32.f32( + %0, + float %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmadd.mask.nxv1f32.f32( + , + float, + , + , + i64); + +define @intrinsic_vfnmadd_mask_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv1f32_f32_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,mf2,ta,mu +; CHECK-NEXT: vfnmadd.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmadd.mask.nxv1f32.f32( + %0, + float %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfnmadd.nxv2f32.f32( + , + float, + , + i64); + +define @intrinsic_vfnmadd_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv2f32_f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m1,ta,mu +; CHECK-NEXT: vfnmadd.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmadd.nxv2f32.f32( + %0, + float %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmadd.mask.nxv2f32.f32( + , + float, + , + , + i64); + +define @intrinsic_vfnmadd_mask_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv2f32_f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m1,ta,mu +; CHECK-NEXT: vfnmadd.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmadd.mask.nxv2f32.f32( + %0, + float %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfnmadd.nxv4f32.f32( + , + float, + , + i64); + +define @intrinsic_vfnmadd_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv4f32_f32_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m2,ta,mu +; CHECK-NEXT: vfnmadd.vf v16, ft0, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmadd.nxv4f32.f32( + %0, + float %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmadd.mask.nxv4f32.f32( + , + float, + , + , + i64); + +define @intrinsic_vfnmadd_mask_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv4f32_f32_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m2,ta,mu +; CHECK-NEXT: vfnmadd.vf v16, ft0, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmadd.mask.nxv4f32.f32( + %0, + float %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfnmadd.nxv8f32.f32( + , + float, + , + i64); + +define @intrinsic_vfnmadd_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv8f32_f32_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m4,ta,mu +; CHECK-NEXT: vfnmadd.vf v16, ft0, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmadd.nxv8f32.f32( + %0, + float %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmadd.mask.nxv8f32.f32( + , + float, + , + , + i64); + +define @intrinsic_vfnmadd_mask_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv8f32_f32_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m4,ta,mu +; CHECK-NEXT: vfnmadd.vf v16, ft0, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmadd.mask.nxv8f32.f32( + %0, + float %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfnmadd.nxv1f64.f64( + , + double, + , + i64); + +define @intrinsic_vfnmadd_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv1f64_f64_nxv1f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.d.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e64,m1,ta,mu +; CHECK-NEXT: vfnmadd.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmadd.nxv1f64.f64( + %0, + double %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmadd.mask.nxv1f64.f64( + , + double, + , + , + i64); + +define @intrinsic_vfnmadd_mask_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv1f64_f64_nxv1f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.d.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e64,m1,ta,mu +; CHECK-NEXT: vfnmadd.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmadd.mask.nxv1f64.f64( + %0, + double %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfnmadd.nxv2f64.f64( + , + double, + , + i64); + +define @intrinsic_vfnmadd_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv2f64_f64_nxv2f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.d.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e64,m2,ta,mu +; CHECK-NEXT: vfnmadd.vf v16, ft0, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmadd.nxv2f64.f64( + %0, + double %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmadd.mask.nxv2f64.f64( + , + double, + , + , + i64); + +define @intrinsic_vfnmadd_mask_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv2f64_f64_nxv2f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.d.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e64,m2,ta,mu +; CHECK-NEXT: vfnmadd.vf v16, ft0, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmadd.mask.nxv2f64.f64( + %0, + double %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfnmadd.nxv4f64.f64( + , + double, + , + i64); + +define @intrinsic_vfnmadd_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv4f64_f64_nxv4f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.d.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e64,m4,ta,mu +; CHECK-NEXT: vfnmadd.vf v16, ft0, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmadd.nxv4f64.f64( + %0, + double %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmadd.mask.nxv4f64.f64( + , + double, + , + , + i64); + +define @intrinsic_vfnmadd_mask_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv4f64_f64_nxv4f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.d.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e64,m4,ta,mu +; CHECK-NEXT: vfnmadd.vf v16, ft0, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmadd.mask.nxv4f64.f64( + %0, + double %1, + %2, + %3, + i64 %4) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmsac-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmsac-rv32.ll new file mode 100644 index 0000000000000..7b090881a3360 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfnmsac-rv32.ll @@ -0,0 +1,856 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f,+experimental-zfh -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare @llvm.riscv.vfnmsac.nxv1f16.nxv1f16( + , + , + , + i32); + +define @intrinsic_vfnmsac_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv1f16_nxv1f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu +; CHECK-NEXT: vfnmsac.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsac.nxv1f16.nxv1f16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfnmsac.mask.nxv1f16.nxv1f16( + , + , + , + , + i32); + +define @intrinsic_vfnmsac_mask_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_mask_vv_nxv1f16_nxv1f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu +; CHECK-NEXT: vfnmsac.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsac.mask.nxv1f16.nxv1f16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfnmsac.nxv2f16.nxv2f16( + , + , + , + i32); + +define @intrinsic_vfnmsac_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv2f16_nxv2f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu +; CHECK-NEXT: vfnmsac.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsac.nxv2f16.nxv2f16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfnmsac.mask.nxv2f16.nxv2f16( + , + , + , + , + i32); + +define @intrinsic_vfnmsac_mask_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_mask_vv_nxv2f16_nxv2f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu +; CHECK-NEXT: vfnmsac.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsac.mask.nxv2f16.nxv2f16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfnmsac.nxv4f16.nxv4f16( + , + , + , + i32); + +define @intrinsic_vfnmsac_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv4f16_nxv4f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vfnmsac.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsac.nxv4f16.nxv4f16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfnmsac.mask.nxv4f16.nxv4f16( + , + , + , + , + i32); + +define @intrinsic_vfnmsac_mask_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_mask_vv_nxv4f16_nxv4f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vfnmsac.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsac.mask.nxv4f16.nxv4f16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfnmsac.nxv8f16.nxv8f16( + , + , + , + i32); + +define @intrinsic_vfnmsac_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv8f16_nxv8f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu +; CHECK-NEXT: vfnmsac.vv v16, v18, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsac.nxv8f16.nxv8f16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfnmsac.mask.nxv8f16.nxv8f16( + , + , + , + , + i32); + +define @intrinsic_vfnmsac_mask_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_mask_vv_nxv8f16_nxv8f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu +; CHECK-NEXT: vfnmsac.vv v16, v18, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsac.mask.nxv8f16.nxv8f16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfnmsac.nxv16f16.nxv16f16( + , + , + , + i32); + +define @intrinsic_vfnmsac_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv16f16_nxv16f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a0) +; CHECK-NEXT: vsetvli a0, a1, e16,m4,ta,mu +; CHECK-NEXT: vfnmsac.vv v16, v20, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsac.nxv16f16.nxv16f16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfnmsac.mask.nxv16f16.nxv16f16( + , + , + , + , + i32); + +define @intrinsic_vfnmsac_mask_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_mask_vv_nxv16f16_nxv16f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a0) +; CHECK-NEXT: vsetvli a0, a1, e16,m4,ta,mu +; CHECK-NEXT: vfnmsac.vv v16, v20, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsac.mask.nxv16f16.nxv16f16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfnmsac.nxv1f32.nxv1f32( + , + , + , + i32); + +define @intrinsic_vfnmsac_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv1f32_nxv1f32_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,mf2,ta,mu +; CHECK-NEXT: vfnmsac.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsac.nxv1f32.nxv1f32( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfnmsac.mask.nxv1f32.nxv1f32( + , + , + , + , + i32); + +define @intrinsic_vfnmsac_mask_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_mask_vv_nxv1f32_nxv1f32_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,mf2,ta,mu +; CHECK-NEXT: vfnmsac.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsac.mask.nxv1f32.nxv1f32( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfnmsac.nxv2f32.nxv2f32( + , + , + , + i32); + +define @intrinsic_vfnmsac_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv2f32_nxv2f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; CHECK-NEXT: vfnmsac.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsac.nxv2f32.nxv2f32( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfnmsac.mask.nxv2f32.nxv2f32( + , + , + , + , + i32); + +define @intrinsic_vfnmsac_mask_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_mask_vv_nxv2f32_nxv2f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; CHECK-NEXT: vfnmsac.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsac.mask.nxv2f32.nxv2f32( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfnmsac.nxv4f32.nxv4f32( + , + , + , + i32); + +define @intrinsic_vfnmsac_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv4f32_nxv4f32_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu +; CHECK-NEXT: vfnmsac.vv v16, v18, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsac.nxv4f32.nxv4f32( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfnmsac.mask.nxv4f32.nxv4f32( + , + , + , + , + i32); + +define @intrinsic_vfnmsac_mask_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_mask_vv_nxv4f32_nxv4f32_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu +; CHECK-NEXT: vfnmsac.vv v16, v18, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsac.mask.nxv4f32.nxv4f32( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfnmsac.nxv8f32.nxv8f32( + , + , + , + i32); + +define @intrinsic_vfnmsac_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv8f32_nxv8f32_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e32,m4,ta,mu +; CHECK-NEXT: vle32.v v28, (a0) +; CHECK-NEXT: vsetvli a0, a1, e32,m4,ta,mu +; CHECK-NEXT: vfnmsac.vv v16, v20, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsac.nxv8f32.nxv8f32( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfnmsac.mask.nxv8f32.nxv8f32( + , + , + , + , + i32); + +define @intrinsic_vfnmsac_mask_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_mask_vv_nxv8f32_nxv8f32_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e32,m4,ta,mu +; CHECK-NEXT: vle32.v v28, (a0) +; CHECK-NEXT: vsetvli a0, a1, e32,m4,ta,mu +; CHECK-NEXT: vfnmsac.vv v16, v20, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsac.mask.nxv8f32.nxv8f32( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfnmsac.nxv1f16.f16( + , + half, + , + i32); + +define @intrinsic_vfnmsac_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv1f16_f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf4,ta,mu +; CHECK-NEXT: vfnmsac.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsac.nxv1f16.f16( + %0, + half %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfnmsac.mask.nxv1f16.f16( + , + half, + , + , + i32); + +define @intrinsic_vfnmsac_mask_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv1f16_f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf4,ta,mu +; CHECK-NEXT: vfnmsac.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsac.mask.nxv1f16.f16( + %0, + half %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfnmsac.nxv2f16.f16( + , + half, + , + i32); + +define @intrinsic_vfnmsac_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv2f16_f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf2,ta,mu +; CHECK-NEXT: vfnmsac.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsac.nxv2f16.f16( + %0, + half %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfnmsac.mask.nxv2f16.f16( + , + half, + , + , + i32); + +define @intrinsic_vfnmsac_mask_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv2f16_f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf2,ta,mu +; CHECK-NEXT: vfnmsac.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsac.mask.nxv2f16.f16( + %0, + half %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfnmsac.nxv4f16.f16( + , + half, + , + i32); + +define @intrinsic_vfnmsac_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv4f16_f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m1,ta,mu +; CHECK-NEXT: vfnmsac.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsac.nxv4f16.f16( + %0, + half %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfnmsac.mask.nxv4f16.f16( + , + half, + , + , + i32); + +define @intrinsic_vfnmsac_mask_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv4f16_f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m1,ta,mu +; CHECK-NEXT: vfnmsac.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsac.mask.nxv4f16.f16( + %0, + half %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfnmsac.nxv8f16.f16( + , + half, + , + i32); + +define @intrinsic_vfnmsac_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv8f16_f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m2,ta,mu +; CHECK-NEXT: vfnmsac.vf v16, ft0, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsac.nxv8f16.f16( + %0, + half %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfnmsac.mask.nxv8f16.f16( + , + half, + , + , + i32); + +define @intrinsic_vfnmsac_mask_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv8f16_f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m2,ta,mu +; CHECK-NEXT: vfnmsac.vf v16, ft0, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsac.mask.nxv8f16.f16( + %0, + half %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfnmsac.nxv16f16.f16( + , + half, + , + i32); + +define @intrinsic_vfnmsac_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv16f16_f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m4,ta,mu +; CHECK-NEXT: vfnmsac.vf v16, ft0, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsac.nxv16f16.f16( + %0, + half %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfnmsac.mask.nxv16f16.f16( + , + half, + , + , + i32); + +define @intrinsic_vfnmsac_mask_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv16f16_f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m4,ta,mu +; CHECK-NEXT: vfnmsac.vf v16, ft0, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsac.mask.nxv16f16.f16( + %0, + half %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfnmsac.nxv1f32.f32( + , + float, + , + i32); + +define @intrinsic_vfnmsac_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv1f32_f32_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,mf2,ta,mu +; CHECK-NEXT: vfnmsac.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsac.nxv1f32.f32( + %0, + float %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfnmsac.mask.nxv1f32.f32( + , + float, + , + , + i32); + +define @intrinsic_vfnmsac_mask_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv1f32_f32_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,mf2,ta,mu +; CHECK-NEXT: vfnmsac.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsac.mask.nxv1f32.f32( + %0, + float %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfnmsac.nxv2f32.f32( + , + float, + , + i32); + +define @intrinsic_vfnmsac_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv2f32_f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m1,ta,mu +; CHECK-NEXT: vfnmsac.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsac.nxv2f32.f32( + %0, + float %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfnmsac.mask.nxv2f32.f32( + , + float, + , + , + i32); + +define @intrinsic_vfnmsac_mask_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv2f32_f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m1,ta,mu +; CHECK-NEXT: vfnmsac.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsac.mask.nxv2f32.f32( + %0, + float %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfnmsac.nxv4f32.f32( + , + float, + , + i32); + +define @intrinsic_vfnmsac_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv4f32_f32_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m2,ta,mu +; CHECK-NEXT: vfnmsac.vf v16, ft0, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsac.nxv4f32.f32( + %0, + float %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfnmsac.mask.nxv4f32.f32( + , + float, + , + , + i32); + +define @intrinsic_vfnmsac_mask_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv4f32_f32_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m2,ta,mu +; CHECK-NEXT: vfnmsac.vf v16, ft0, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsac.mask.nxv4f32.f32( + %0, + float %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfnmsac.nxv8f32.f32( + , + float, + , + i32); + +define @intrinsic_vfnmsac_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv8f32_f32_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m4,ta,mu +; CHECK-NEXT: vfnmsac.vf v16, ft0, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsac.nxv8f32.f32( + %0, + float %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfnmsac.mask.nxv8f32.f32( + , + float, + , + , + i32); + +define @intrinsic_vfnmsac_mask_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv8f32_f32_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m4,ta,mu +; CHECK-NEXT: vfnmsac.vf v16, ft0, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsac.mask.nxv8f32.f32( + %0, + float %1, + %2, + %3, + i32 %4) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmsac-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmsac-rv64.ll new file mode 100644 index 0000000000000..23d83c29e247f --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfnmsac-rv64.ll @@ -0,0 +1,1142 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+experimental-zfh -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare @llvm.riscv.vfnmsac.nxv1f16.nxv1f16( + , + , + , + i64); + +define @intrinsic_vfnmsac_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv1f16_nxv1f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu +; CHECK-NEXT: vfnmsac.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsac.nxv1f16.nxv1f16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmsac.mask.nxv1f16.nxv1f16( + , + , + , + , + i64); + +define @intrinsic_vfnmsac_mask_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_mask_vv_nxv1f16_nxv1f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu +; CHECK-NEXT: vfnmsac.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsac.mask.nxv1f16.nxv1f16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfnmsac.nxv2f16.nxv2f16( + , + , + , + i64); + +define @intrinsic_vfnmsac_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv2f16_nxv2f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu +; CHECK-NEXT: vfnmsac.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsac.nxv2f16.nxv2f16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmsac.mask.nxv2f16.nxv2f16( + , + , + , + , + i64); + +define @intrinsic_vfnmsac_mask_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_mask_vv_nxv2f16_nxv2f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu +; CHECK-NEXT: vfnmsac.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsac.mask.nxv2f16.nxv2f16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfnmsac.nxv4f16.nxv4f16( + , + , + , + i64); + +define @intrinsic_vfnmsac_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv4f16_nxv4f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vfnmsac.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsac.nxv4f16.nxv4f16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmsac.mask.nxv4f16.nxv4f16( + , + , + , + , + i64); + +define @intrinsic_vfnmsac_mask_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_mask_vv_nxv4f16_nxv4f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vfnmsac.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsac.mask.nxv4f16.nxv4f16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfnmsac.nxv8f16.nxv8f16( + , + , + , + i64); + +define @intrinsic_vfnmsac_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv8f16_nxv8f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu +; CHECK-NEXT: vfnmsac.vv v16, v18, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsac.nxv8f16.nxv8f16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmsac.mask.nxv8f16.nxv8f16( + , + , + , + , + i64); + +define @intrinsic_vfnmsac_mask_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_mask_vv_nxv8f16_nxv8f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu +; CHECK-NEXT: vfnmsac.vv v16, v18, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsac.mask.nxv8f16.nxv8f16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfnmsac.nxv16f16.nxv16f16( + , + , + , + i64); + +define @intrinsic_vfnmsac_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv16f16_nxv16f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a0) +; CHECK-NEXT: vsetvli a0, a1, e16,m4,ta,mu +; CHECK-NEXT: vfnmsac.vv v16, v20, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsac.nxv16f16.nxv16f16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmsac.mask.nxv16f16.nxv16f16( + , + , + , + , + i64); + +define @intrinsic_vfnmsac_mask_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_mask_vv_nxv16f16_nxv16f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a0) +; CHECK-NEXT: vsetvli a0, a1, e16,m4,ta,mu +; CHECK-NEXT: vfnmsac.vv v16, v20, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsac.mask.nxv16f16.nxv16f16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfnmsac.nxv1f32.nxv1f32( + , + , + , + i64); + +define @intrinsic_vfnmsac_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv1f32_nxv1f32_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,mf2,ta,mu +; CHECK-NEXT: vfnmsac.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsac.nxv1f32.nxv1f32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmsac.mask.nxv1f32.nxv1f32( + , + , + , + , + i64); + +define @intrinsic_vfnmsac_mask_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_mask_vv_nxv1f32_nxv1f32_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,mf2,ta,mu +; CHECK-NEXT: vfnmsac.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsac.mask.nxv1f32.nxv1f32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfnmsac.nxv2f32.nxv2f32( + , + , + , + i64); + +define @intrinsic_vfnmsac_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv2f32_nxv2f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; CHECK-NEXT: vfnmsac.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsac.nxv2f32.nxv2f32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmsac.mask.nxv2f32.nxv2f32( + , + , + , + , + i64); + +define @intrinsic_vfnmsac_mask_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_mask_vv_nxv2f32_nxv2f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; CHECK-NEXT: vfnmsac.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsac.mask.nxv2f32.nxv2f32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfnmsac.nxv4f32.nxv4f32( + , + , + , + i64); + +define @intrinsic_vfnmsac_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv4f32_nxv4f32_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu +; CHECK-NEXT: vfnmsac.vv v16, v18, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsac.nxv4f32.nxv4f32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmsac.mask.nxv4f32.nxv4f32( + , + , + , + , + i64); + +define @intrinsic_vfnmsac_mask_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_mask_vv_nxv4f32_nxv4f32_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu +; CHECK-NEXT: vfnmsac.vv v16, v18, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsac.mask.nxv4f32.nxv4f32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfnmsac.nxv8f32.nxv8f32( + , + , + , + i64); + +define @intrinsic_vfnmsac_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv8f32_nxv8f32_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e32,m4,ta,mu +; CHECK-NEXT: vle32.v v28, (a0) +; CHECK-NEXT: vsetvli a0, a1, e32,m4,ta,mu +; CHECK-NEXT: vfnmsac.vv v16, v20, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsac.nxv8f32.nxv8f32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmsac.mask.nxv8f32.nxv8f32( + , + , + , + , + i64); + +define @intrinsic_vfnmsac_mask_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_mask_vv_nxv8f32_nxv8f32_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e32,m4,ta,mu +; CHECK-NEXT: vle32.v v28, (a0) +; CHECK-NEXT: vsetvli a0, a1, e32,m4,ta,mu +; CHECK-NEXT: vfnmsac.vv v16, v20, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsac.mask.nxv8f32.nxv8f32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfnmsac.nxv1f64.nxv1f64( + , + , + , + i64); + +define @intrinsic_vfnmsac_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv1f64_nxv1f64_nxv1f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu +; CHECK-NEXT: vfnmsac.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsac.nxv1f64.nxv1f64( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmsac.mask.nxv1f64.nxv1f64( + , + , + , + , + i64); + +define @intrinsic_vfnmsac_mask_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_mask_vv_nxv1f64_nxv1f64_nxv1f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu +; CHECK-NEXT: vfnmsac.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsac.mask.nxv1f64.nxv1f64( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfnmsac.nxv2f64.nxv2f64( + , + , + , + i64); + +define @intrinsic_vfnmsac_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv2f64_nxv2f64_nxv2f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e64,m2,ta,mu +; CHECK-NEXT: vfnmsac.vv v16, v18, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsac.nxv2f64.nxv2f64( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmsac.mask.nxv2f64.nxv2f64( + , + , + , + , + i64); + +define @intrinsic_vfnmsac_mask_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_mask_vv_nxv2f64_nxv2f64_nxv2f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e64,m2,ta,mu +; CHECK-NEXT: vfnmsac.vv v16, v18, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsac.mask.nxv2f64.nxv2f64( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfnmsac.nxv4f64.nxv4f64( + , + , + , + i64); + +define @intrinsic_vfnmsac_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv4f64_nxv4f64_nxv4f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e64,m4,ta,mu +; CHECK-NEXT: vle64.v v28, (a0) +; CHECK-NEXT: vsetvli a0, a1, e64,m4,ta,mu +; CHECK-NEXT: vfnmsac.vv v16, v20, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsac.nxv4f64.nxv4f64( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmsac.mask.nxv4f64.nxv4f64( + , + , + , + , + i64); + +define @intrinsic_vfnmsac_mask_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_mask_vv_nxv4f64_nxv4f64_nxv4f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e64,m4,ta,mu +; CHECK-NEXT: vle64.v v28, (a0) +; CHECK-NEXT: vsetvli a0, a1, e64,m4,ta,mu +; CHECK-NEXT: vfnmsac.vv v16, v20, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsac.mask.nxv4f64.nxv4f64( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfnmsac.nxv1f16.f16( + , + half, + , + i64); + +define @intrinsic_vfnmsac_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv1f16_f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf4,ta,mu +; CHECK-NEXT: vfnmsac.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsac.nxv1f16.f16( + %0, + half %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmsac.mask.nxv1f16.f16( + , + half, + , + , + i64); + +define @intrinsic_vfnmsac_mask_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv1f16_f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf4,ta,mu +; CHECK-NEXT: vfnmsac.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsac.mask.nxv1f16.f16( + %0, + half %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfnmsac.nxv2f16.f16( + , + half, + , + i64); + +define @intrinsic_vfnmsac_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv2f16_f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf2,ta,mu +; CHECK-NEXT: vfnmsac.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsac.nxv2f16.f16( + %0, + half %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmsac.mask.nxv2f16.f16( + , + half, + , + , + i64); + +define @intrinsic_vfnmsac_mask_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv2f16_f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf2,ta,mu +; CHECK-NEXT: vfnmsac.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsac.mask.nxv2f16.f16( + %0, + half %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfnmsac.nxv4f16.f16( + , + half, + , + i64); + +define @intrinsic_vfnmsac_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv4f16_f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m1,ta,mu +; CHECK-NEXT: vfnmsac.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsac.nxv4f16.f16( + %0, + half %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmsac.mask.nxv4f16.f16( + , + half, + , + , + i64); + +define @intrinsic_vfnmsac_mask_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv4f16_f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m1,ta,mu +; CHECK-NEXT: vfnmsac.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsac.mask.nxv4f16.f16( + %0, + half %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfnmsac.nxv8f16.f16( + , + half, + , + i64); + +define @intrinsic_vfnmsac_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv8f16_f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m2,ta,mu +; CHECK-NEXT: vfnmsac.vf v16, ft0, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsac.nxv8f16.f16( + %0, + half %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmsac.mask.nxv8f16.f16( + , + half, + , + , + i64); + +define @intrinsic_vfnmsac_mask_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv8f16_f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m2,ta,mu +; CHECK-NEXT: vfnmsac.vf v16, ft0, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsac.mask.nxv8f16.f16( + %0, + half %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfnmsac.nxv16f16.f16( + , + half, + , + i64); + +define @intrinsic_vfnmsac_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv16f16_f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m4,ta,mu +; CHECK-NEXT: vfnmsac.vf v16, ft0, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsac.nxv16f16.f16( + %0, + half %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmsac.mask.nxv16f16.f16( + , + half, + , + , + i64); + +define @intrinsic_vfnmsac_mask_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv16f16_f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m4,ta,mu +; CHECK-NEXT: vfnmsac.vf v16, ft0, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsac.mask.nxv16f16.f16( + %0, + half %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfnmsac.nxv1f32.f32( + , + float, + , + i64); + +define @intrinsic_vfnmsac_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv1f32_f32_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,mf2,ta,mu +; CHECK-NEXT: vfnmsac.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsac.nxv1f32.f32( + %0, + float %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmsac.mask.nxv1f32.f32( + , + float, + , + , + i64); + +define @intrinsic_vfnmsac_mask_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv1f32_f32_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,mf2,ta,mu +; CHECK-NEXT: vfnmsac.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsac.mask.nxv1f32.f32( + %0, + float %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfnmsac.nxv2f32.f32( + , + float, + , + i64); + +define @intrinsic_vfnmsac_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv2f32_f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m1,ta,mu +; CHECK-NEXT: vfnmsac.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsac.nxv2f32.f32( + %0, + float %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmsac.mask.nxv2f32.f32( + , + float, + , + , + i64); + +define @intrinsic_vfnmsac_mask_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv2f32_f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m1,ta,mu +; CHECK-NEXT: vfnmsac.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsac.mask.nxv2f32.f32( + %0, + float %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfnmsac.nxv4f32.f32( + , + float, + , + i64); + +define @intrinsic_vfnmsac_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv4f32_f32_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m2,ta,mu +; CHECK-NEXT: vfnmsac.vf v16, ft0, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsac.nxv4f32.f32( + %0, + float %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmsac.mask.nxv4f32.f32( + , + float, + , + , + i64); + +define @intrinsic_vfnmsac_mask_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv4f32_f32_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m2,ta,mu +; CHECK-NEXT: vfnmsac.vf v16, ft0, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsac.mask.nxv4f32.f32( + %0, + float %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfnmsac.nxv8f32.f32( + , + float, + , + i64); + +define @intrinsic_vfnmsac_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv8f32_f32_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m4,ta,mu +; CHECK-NEXT: vfnmsac.vf v16, ft0, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsac.nxv8f32.f32( + %0, + float %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmsac.mask.nxv8f32.f32( + , + float, + , + , + i64); + +define @intrinsic_vfnmsac_mask_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv8f32_f32_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m4,ta,mu +; CHECK-NEXT: vfnmsac.vf v16, ft0, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsac.mask.nxv8f32.f32( + %0, + float %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfnmsac.nxv1f64.f64( + , + double, + , + i64); + +define @intrinsic_vfnmsac_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv1f64_f64_nxv1f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.d.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e64,m1,ta,mu +; CHECK-NEXT: vfnmsac.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsac.nxv1f64.f64( + %0, + double %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmsac.mask.nxv1f64.f64( + , + double, + , + , + i64); + +define @intrinsic_vfnmsac_mask_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv1f64_f64_nxv1f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.d.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e64,m1,ta,mu +; CHECK-NEXT: vfnmsac.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsac.mask.nxv1f64.f64( + %0, + double %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfnmsac.nxv2f64.f64( + , + double, + , + i64); + +define @intrinsic_vfnmsac_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv2f64_f64_nxv2f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.d.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e64,m2,ta,mu +; CHECK-NEXT: vfnmsac.vf v16, ft0, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsac.nxv2f64.f64( + %0, + double %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmsac.mask.nxv2f64.f64( + , + double, + , + , + i64); + +define @intrinsic_vfnmsac_mask_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv2f64_f64_nxv2f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.d.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e64,m2,ta,mu +; CHECK-NEXT: vfnmsac.vf v16, ft0, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsac.mask.nxv2f64.f64( + %0, + double %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfnmsac.nxv4f64.f64( + , + double, + , + i64); + +define @intrinsic_vfnmsac_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv4f64_f64_nxv4f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.d.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e64,m4,ta,mu +; CHECK-NEXT: vfnmsac.vf v16, ft0, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsac.nxv4f64.f64( + %0, + double %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmsac.mask.nxv4f64.f64( + , + double, + , + , + i64); + +define @intrinsic_vfnmsac_mask_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv4f64_f64_nxv4f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.d.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e64,m4,ta,mu +; CHECK-NEXT: vfnmsac.vf v16, ft0, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsac.mask.nxv4f64.f64( + %0, + double %1, + %2, + %3, + i64 %4) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmsub-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmsub-rv32.ll new file mode 100644 index 0000000000000..4cbeb71a86533 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfnmsub-rv32.ll @@ -0,0 +1,856 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f,+experimental-zfh -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare @llvm.riscv.vfnmsub.nxv1f16.nxv1f16( + , + , + , + i32); + +define @intrinsic_vfnmsub_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv1f16_nxv1f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu +; CHECK-NEXT: vfnmsub.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsub.nxv1f16.nxv1f16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfnmsub.mask.nxv1f16.nxv1f16( + , + , + , + , + i32); + +define @intrinsic_vfnmsub_mask_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_mask_vv_nxv1f16_nxv1f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu +; CHECK-NEXT: vfnmsub.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsub.mask.nxv1f16.nxv1f16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfnmsub.nxv2f16.nxv2f16( + , + , + , + i32); + +define @intrinsic_vfnmsub_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv2f16_nxv2f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu +; CHECK-NEXT: vfnmsub.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsub.nxv2f16.nxv2f16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfnmsub.mask.nxv2f16.nxv2f16( + , + , + , + , + i32); + +define @intrinsic_vfnmsub_mask_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_mask_vv_nxv2f16_nxv2f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu +; CHECK-NEXT: vfnmsub.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsub.mask.nxv2f16.nxv2f16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfnmsub.nxv4f16.nxv4f16( + , + , + , + i32); + +define @intrinsic_vfnmsub_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv4f16_nxv4f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vfnmsub.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsub.nxv4f16.nxv4f16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfnmsub.mask.nxv4f16.nxv4f16( + , + , + , + , + i32); + +define @intrinsic_vfnmsub_mask_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_mask_vv_nxv4f16_nxv4f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vfnmsub.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsub.mask.nxv4f16.nxv4f16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfnmsub.nxv8f16.nxv8f16( + , + , + , + i32); + +define @intrinsic_vfnmsub_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv8f16_nxv8f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu +; CHECK-NEXT: vfnmsub.vv v16, v18, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsub.nxv8f16.nxv8f16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfnmsub.mask.nxv8f16.nxv8f16( + , + , + , + , + i32); + +define @intrinsic_vfnmsub_mask_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_mask_vv_nxv8f16_nxv8f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu +; CHECK-NEXT: vfnmsub.vv v16, v18, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsub.mask.nxv8f16.nxv8f16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfnmsub.nxv16f16.nxv16f16( + , + , + , + i32); + +define @intrinsic_vfnmsub_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv16f16_nxv16f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a0) +; CHECK-NEXT: vsetvli a0, a1, e16,m4,ta,mu +; CHECK-NEXT: vfnmsub.vv v16, v20, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsub.nxv16f16.nxv16f16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfnmsub.mask.nxv16f16.nxv16f16( + , + , + , + , + i32); + +define @intrinsic_vfnmsub_mask_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_mask_vv_nxv16f16_nxv16f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a0) +; CHECK-NEXT: vsetvli a0, a1, e16,m4,ta,mu +; CHECK-NEXT: vfnmsub.vv v16, v20, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsub.mask.nxv16f16.nxv16f16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfnmsub.nxv1f32.nxv1f32( + , + , + , + i32); + +define @intrinsic_vfnmsub_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv1f32_nxv1f32_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,mf2,ta,mu +; CHECK-NEXT: vfnmsub.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsub.nxv1f32.nxv1f32( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfnmsub.mask.nxv1f32.nxv1f32( + , + , + , + , + i32); + +define @intrinsic_vfnmsub_mask_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_mask_vv_nxv1f32_nxv1f32_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,mf2,ta,mu +; CHECK-NEXT: vfnmsub.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsub.mask.nxv1f32.nxv1f32( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfnmsub.nxv2f32.nxv2f32( + , + , + , + i32); + +define @intrinsic_vfnmsub_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv2f32_nxv2f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; CHECK-NEXT: vfnmsub.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsub.nxv2f32.nxv2f32( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfnmsub.mask.nxv2f32.nxv2f32( + , + , + , + , + i32); + +define @intrinsic_vfnmsub_mask_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_mask_vv_nxv2f32_nxv2f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; CHECK-NEXT: vfnmsub.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsub.mask.nxv2f32.nxv2f32( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfnmsub.nxv4f32.nxv4f32( + , + , + , + i32); + +define @intrinsic_vfnmsub_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv4f32_nxv4f32_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu +; CHECK-NEXT: vfnmsub.vv v16, v18, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsub.nxv4f32.nxv4f32( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfnmsub.mask.nxv4f32.nxv4f32( + , + , + , + , + i32); + +define @intrinsic_vfnmsub_mask_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_mask_vv_nxv4f32_nxv4f32_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu +; CHECK-NEXT: vfnmsub.vv v16, v18, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsub.mask.nxv4f32.nxv4f32( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfnmsub.nxv8f32.nxv8f32( + , + , + , + i32); + +define @intrinsic_vfnmsub_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv8f32_nxv8f32_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e32,m4,ta,mu +; CHECK-NEXT: vle32.v v28, (a0) +; CHECK-NEXT: vsetvli a0, a1, e32,m4,ta,mu +; CHECK-NEXT: vfnmsub.vv v16, v20, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsub.nxv8f32.nxv8f32( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfnmsub.mask.nxv8f32.nxv8f32( + , + , + , + , + i32); + +define @intrinsic_vfnmsub_mask_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_mask_vv_nxv8f32_nxv8f32_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e32,m4,ta,mu +; CHECK-NEXT: vle32.v v28, (a0) +; CHECK-NEXT: vsetvli a0, a1, e32,m4,ta,mu +; CHECK-NEXT: vfnmsub.vv v16, v20, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsub.mask.nxv8f32.nxv8f32( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfnmsub.nxv1f16.f16( + , + half, + , + i32); + +define @intrinsic_vfnmsub_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv1f16_f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf4,ta,mu +; CHECK-NEXT: vfnmsub.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsub.nxv1f16.f16( + %0, + half %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfnmsub.mask.nxv1f16.f16( + , + half, + , + , + i32); + +define @intrinsic_vfnmsub_mask_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv1f16_f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf4,ta,mu +; CHECK-NEXT: vfnmsub.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsub.mask.nxv1f16.f16( + %0, + half %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfnmsub.nxv2f16.f16( + , + half, + , + i32); + +define @intrinsic_vfnmsub_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv2f16_f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf2,ta,mu +; CHECK-NEXT: vfnmsub.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsub.nxv2f16.f16( + %0, + half %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfnmsub.mask.nxv2f16.f16( + , + half, + , + , + i32); + +define @intrinsic_vfnmsub_mask_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv2f16_f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf2,ta,mu +; CHECK-NEXT: vfnmsub.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsub.mask.nxv2f16.f16( + %0, + half %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfnmsub.nxv4f16.f16( + , + half, + , + i32); + +define @intrinsic_vfnmsub_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv4f16_f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m1,ta,mu +; CHECK-NEXT: vfnmsub.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsub.nxv4f16.f16( + %0, + half %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfnmsub.mask.nxv4f16.f16( + , + half, + , + , + i32); + +define @intrinsic_vfnmsub_mask_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv4f16_f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m1,ta,mu +; CHECK-NEXT: vfnmsub.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsub.mask.nxv4f16.f16( + %0, + half %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfnmsub.nxv8f16.f16( + , + half, + , + i32); + +define @intrinsic_vfnmsub_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv8f16_f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m2,ta,mu +; CHECK-NEXT: vfnmsub.vf v16, ft0, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsub.nxv8f16.f16( + %0, + half %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfnmsub.mask.nxv8f16.f16( + , + half, + , + , + i32); + +define @intrinsic_vfnmsub_mask_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv8f16_f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m2,ta,mu +; CHECK-NEXT: vfnmsub.vf v16, ft0, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsub.mask.nxv8f16.f16( + %0, + half %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfnmsub.nxv16f16.f16( + , + half, + , + i32); + +define @intrinsic_vfnmsub_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv16f16_f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m4,ta,mu +; CHECK-NEXT: vfnmsub.vf v16, ft0, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsub.nxv16f16.f16( + %0, + half %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfnmsub.mask.nxv16f16.f16( + , + half, + , + , + i32); + +define @intrinsic_vfnmsub_mask_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv16f16_f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m4,ta,mu +; CHECK-NEXT: vfnmsub.vf v16, ft0, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsub.mask.nxv16f16.f16( + %0, + half %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfnmsub.nxv1f32.f32( + , + float, + , + i32); + +define @intrinsic_vfnmsub_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv1f32_f32_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,mf2,ta,mu +; CHECK-NEXT: vfnmsub.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsub.nxv1f32.f32( + %0, + float %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfnmsub.mask.nxv1f32.f32( + , + float, + , + , + i32); + +define @intrinsic_vfnmsub_mask_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv1f32_f32_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,mf2,ta,mu +; CHECK-NEXT: vfnmsub.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsub.mask.nxv1f32.f32( + %0, + float %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfnmsub.nxv2f32.f32( + , + float, + , + i32); + +define @intrinsic_vfnmsub_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv2f32_f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m1,ta,mu +; CHECK-NEXT: vfnmsub.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsub.nxv2f32.f32( + %0, + float %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfnmsub.mask.nxv2f32.f32( + , + float, + , + , + i32); + +define @intrinsic_vfnmsub_mask_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv2f32_f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m1,ta,mu +; CHECK-NEXT: vfnmsub.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsub.mask.nxv2f32.f32( + %0, + float %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfnmsub.nxv4f32.f32( + , + float, + , + i32); + +define @intrinsic_vfnmsub_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv4f32_f32_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m2,ta,mu +; CHECK-NEXT: vfnmsub.vf v16, ft0, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsub.nxv4f32.f32( + %0, + float %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfnmsub.mask.nxv4f32.f32( + , + float, + , + , + i32); + +define @intrinsic_vfnmsub_mask_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv4f32_f32_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m2,ta,mu +; CHECK-NEXT: vfnmsub.vf v16, ft0, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsub.mask.nxv4f32.f32( + %0, + float %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfnmsub.nxv8f32.f32( + , + float, + , + i32); + +define @intrinsic_vfnmsub_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv8f32_f32_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m4,ta,mu +; CHECK-NEXT: vfnmsub.vf v16, ft0, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsub.nxv8f32.f32( + %0, + float %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfnmsub.mask.nxv8f32.f32( + , + float, + , + , + i32); + +define @intrinsic_vfnmsub_mask_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv8f32_f32_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m4,ta,mu +; CHECK-NEXT: vfnmsub.vf v16, ft0, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsub.mask.nxv8f32.f32( + %0, + float %1, + %2, + %3, + i32 %4) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmsub-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmsub-rv64.ll new file mode 100644 index 0000000000000..c25fee55ba368 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfnmsub-rv64.ll @@ -0,0 +1,1142 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+experimental-zfh -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare @llvm.riscv.vfnmsub.nxv1f16.nxv1f16( + , + , + , + i64); + +define @intrinsic_vfnmsub_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv1f16_nxv1f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu +; CHECK-NEXT: vfnmsub.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsub.nxv1f16.nxv1f16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmsub.mask.nxv1f16.nxv1f16( + , + , + , + , + i64); + +define @intrinsic_vfnmsub_mask_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_mask_vv_nxv1f16_nxv1f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu +; CHECK-NEXT: vfnmsub.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsub.mask.nxv1f16.nxv1f16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfnmsub.nxv2f16.nxv2f16( + , + , + , + i64); + +define @intrinsic_vfnmsub_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv2f16_nxv2f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu +; CHECK-NEXT: vfnmsub.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsub.nxv2f16.nxv2f16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmsub.mask.nxv2f16.nxv2f16( + , + , + , + , + i64); + +define @intrinsic_vfnmsub_mask_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_mask_vv_nxv2f16_nxv2f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu +; CHECK-NEXT: vfnmsub.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsub.mask.nxv2f16.nxv2f16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfnmsub.nxv4f16.nxv4f16( + , + , + , + i64); + +define @intrinsic_vfnmsub_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv4f16_nxv4f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vfnmsub.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsub.nxv4f16.nxv4f16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmsub.mask.nxv4f16.nxv4f16( + , + , + , + , + i64); + +define @intrinsic_vfnmsub_mask_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_mask_vv_nxv4f16_nxv4f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vfnmsub.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsub.mask.nxv4f16.nxv4f16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfnmsub.nxv8f16.nxv8f16( + , + , + , + i64); + +define @intrinsic_vfnmsub_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv8f16_nxv8f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu +; CHECK-NEXT: vfnmsub.vv v16, v18, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsub.nxv8f16.nxv8f16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmsub.mask.nxv8f16.nxv8f16( + , + , + , + , + i64); + +define @intrinsic_vfnmsub_mask_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_mask_vv_nxv8f16_nxv8f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu +; CHECK-NEXT: vfnmsub.vv v16, v18, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsub.mask.nxv8f16.nxv8f16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfnmsub.nxv16f16.nxv16f16( + , + , + , + i64); + +define @intrinsic_vfnmsub_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv16f16_nxv16f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a0) +; CHECK-NEXT: vsetvli a0, a1, e16,m4,ta,mu +; CHECK-NEXT: vfnmsub.vv v16, v20, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsub.nxv16f16.nxv16f16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmsub.mask.nxv16f16.nxv16f16( + , + , + , + , + i64); + +define @intrinsic_vfnmsub_mask_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_mask_vv_nxv16f16_nxv16f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a0) +; CHECK-NEXT: vsetvli a0, a1, e16,m4,ta,mu +; CHECK-NEXT: vfnmsub.vv v16, v20, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsub.mask.nxv16f16.nxv16f16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfnmsub.nxv1f32.nxv1f32( + , + , + , + i64); + +define @intrinsic_vfnmsub_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv1f32_nxv1f32_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,mf2,ta,mu +; CHECK-NEXT: vfnmsub.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsub.nxv1f32.nxv1f32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmsub.mask.nxv1f32.nxv1f32( + , + , + , + , + i64); + +define @intrinsic_vfnmsub_mask_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_mask_vv_nxv1f32_nxv1f32_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,mf2,ta,mu +; CHECK-NEXT: vfnmsub.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsub.mask.nxv1f32.nxv1f32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfnmsub.nxv2f32.nxv2f32( + , + , + , + i64); + +define @intrinsic_vfnmsub_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv2f32_nxv2f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; CHECK-NEXT: vfnmsub.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsub.nxv2f32.nxv2f32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmsub.mask.nxv2f32.nxv2f32( + , + , + , + , + i64); + +define @intrinsic_vfnmsub_mask_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_mask_vv_nxv2f32_nxv2f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; CHECK-NEXT: vfnmsub.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsub.mask.nxv2f32.nxv2f32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfnmsub.nxv4f32.nxv4f32( + , + , + , + i64); + +define @intrinsic_vfnmsub_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv4f32_nxv4f32_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu +; CHECK-NEXT: vfnmsub.vv v16, v18, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsub.nxv4f32.nxv4f32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmsub.mask.nxv4f32.nxv4f32( + , + , + , + , + i64); + +define @intrinsic_vfnmsub_mask_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_mask_vv_nxv4f32_nxv4f32_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu +; CHECK-NEXT: vfnmsub.vv v16, v18, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsub.mask.nxv4f32.nxv4f32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfnmsub.nxv8f32.nxv8f32( + , + , + , + i64); + +define @intrinsic_vfnmsub_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv8f32_nxv8f32_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e32,m4,ta,mu +; CHECK-NEXT: vle32.v v28, (a0) +; CHECK-NEXT: vsetvli a0, a1, e32,m4,ta,mu +; CHECK-NEXT: vfnmsub.vv v16, v20, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsub.nxv8f32.nxv8f32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmsub.mask.nxv8f32.nxv8f32( + , + , + , + , + i64); + +define @intrinsic_vfnmsub_mask_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_mask_vv_nxv8f32_nxv8f32_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e32,m4,ta,mu +; CHECK-NEXT: vle32.v v28, (a0) +; CHECK-NEXT: vsetvli a0, a1, e32,m4,ta,mu +; CHECK-NEXT: vfnmsub.vv v16, v20, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsub.mask.nxv8f32.nxv8f32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfnmsub.nxv1f64.nxv1f64( + , + , + , + i64); + +define @intrinsic_vfnmsub_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv1f64_nxv1f64_nxv1f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu +; CHECK-NEXT: vfnmsub.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsub.nxv1f64.nxv1f64( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmsub.mask.nxv1f64.nxv1f64( + , + , + , + , + i64); + +define @intrinsic_vfnmsub_mask_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_mask_vv_nxv1f64_nxv1f64_nxv1f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu +; CHECK-NEXT: vfnmsub.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsub.mask.nxv1f64.nxv1f64( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfnmsub.nxv2f64.nxv2f64( + , + , + , + i64); + +define @intrinsic_vfnmsub_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv2f64_nxv2f64_nxv2f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e64,m2,ta,mu +; CHECK-NEXT: vfnmsub.vv v16, v18, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsub.nxv2f64.nxv2f64( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmsub.mask.nxv2f64.nxv2f64( + , + , + , + , + i64); + +define @intrinsic_vfnmsub_mask_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_mask_vv_nxv2f64_nxv2f64_nxv2f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e64,m2,ta,mu +; CHECK-NEXT: vfnmsub.vv v16, v18, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsub.mask.nxv2f64.nxv2f64( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfnmsub.nxv4f64.nxv4f64( + , + , + , + i64); + +define @intrinsic_vfnmsub_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv4f64_nxv4f64_nxv4f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e64,m4,ta,mu +; CHECK-NEXT: vle64.v v28, (a0) +; CHECK-NEXT: vsetvli a0, a1, e64,m4,ta,mu +; CHECK-NEXT: vfnmsub.vv v16, v20, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsub.nxv4f64.nxv4f64( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmsub.mask.nxv4f64.nxv4f64( + , + , + , + , + i64); + +define @intrinsic_vfnmsub_mask_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_mask_vv_nxv4f64_nxv4f64_nxv4f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e64,m4,ta,mu +; CHECK-NEXT: vle64.v v28, (a0) +; CHECK-NEXT: vsetvli a0, a1, e64,m4,ta,mu +; CHECK-NEXT: vfnmsub.vv v16, v20, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsub.mask.nxv4f64.nxv4f64( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfnmsub.nxv1f16.f16( + , + half, + , + i64); + +define @intrinsic_vfnmsub_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv1f16_f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf4,ta,mu +; CHECK-NEXT: vfnmsub.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsub.nxv1f16.f16( + %0, + half %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmsub.mask.nxv1f16.f16( + , + half, + , + , + i64); + +define @intrinsic_vfnmsub_mask_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv1f16_f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf4,ta,mu +; CHECK-NEXT: vfnmsub.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsub.mask.nxv1f16.f16( + %0, + half %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfnmsub.nxv2f16.f16( + , + half, + , + i64); + +define @intrinsic_vfnmsub_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv2f16_f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf2,ta,mu +; CHECK-NEXT: vfnmsub.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsub.nxv2f16.f16( + %0, + half %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmsub.mask.nxv2f16.f16( + , + half, + , + , + i64); + +define @intrinsic_vfnmsub_mask_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv2f16_f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf2,ta,mu +; CHECK-NEXT: vfnmsub.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsub.mask.nxv2f16.f16( + %0, + half %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfnmsub.nxv4f16.f16( + , + half, + , + i64); + +define @intrinsic_vfnmsub_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv4f16_f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m1,ta,mu +; CHECK-NEXT: vfnmsub.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsub.nxv4f16.f16( + %0, + half %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmsub.mask.nxv4f16.f16( + , + half, + , + , + i64); + +define @intrinsic_vfnmsub_mask_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv4f16_f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m1,ta,mu +; CHECK-NEXT: vfnmsub.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsub.mask.nxv4f16.f16( + %0, + half %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfnmsub.nxv8f16.f16( + , + half, + , + i64); + +define @intrinsic_vfnmsub_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv8f16_f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m2,ta,mu +; CHECK-NEXT: vfnmsub.vf v16, ft0, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsub.nxv8f16.f16( + %0, + half %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmsub.mask.nxv8f16.f16( + , + half, + , + , + i64); + +define @intrinsic_vfnmsub_mask_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv8f16_f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m2,ta,mu +; CHECK-NEXT: vfnmsub.vf v16, ft0, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsub.mask.nxv8f16.f16( + %0, + half %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfnmsub.nxv16f16.f16( + , + half, + , + i64); + +define @intrinsic_vfnmsub_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv16f16_f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m4,ta,mu +; CHECK-NEXT: vfnmsub.vf v16, ft0, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsub.nxv16f16.f16( + %0, + half %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmsub.mask.nxv16f16.f16( + , + half, + , + , + i64); + +define @intrinsic_vfnmsub_mask_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv16f16_f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m4,ta,mu +; CHECK-NEXT: vfnmsub.vf v16, ft0, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsub.mask.nxv16f16.f16( + %0, + half %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfnmsub.nxv1f32.f32( + , + float, + , + i64); + +define @intrinsic_vfnmsub_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv1f32_f32_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,mf2,ta,mu +; CHECK-NEXT: vfnmsub.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsub.nxv1f32.f32( + %0, + float %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmsub.mask.nxv1f32.f32( + , + float, + , + , + i64); + +define @intrinsic_vfnmsub_mask_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv1f32_f32_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,mf2,ta,mu +; CHECK-NEXT: vfnmsub.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsub.mask.nxv1f32.f32( + %0, + float %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfnmsub.nxv2f32.f32( + , + float, + , + i64); + +define @intrinsic_vfnmsub_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv2f32_f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m1,ta,mu +; CHECK-NEXT: vfnmsub.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsub.nxv2f32.f32( + %0, + float %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmsub.mask.nxv2f32.f32( + , + float, + , + , + i64); + +define @intrinsic_vfnmsub_mask_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv2f32_f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m1,ta,mu +; CHECK-NEXT: vfnmsub.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsub.mask.nxv2f32.f32( + %0, + float %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfnmsub.nxv4f32.f32( + , + float, + , + i64); + +define @intrinsic_vfnmsub_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv4f32_f32_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m2,ta,mu +; CHECK-NEXT: vfnmsub.vf v16, ft0, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsub.nxv4f32.f32( + %0, + float %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmsub.mask.nxv4f32.f32( + , + float, + , + , + i64); + +define @intrinsic_vfnmsub_mask_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv4f32_f32_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m2,ta,mu +; CHECK-NEXT: vfnmsub.vf v16, ft0, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsub.mask.nxv4f32.f32( + %0, + float %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfnmsub.nxv8f32.f32( + , + float, + , + i64); + +define @intrinsic_vfnmsub_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv8f32_f32_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m4,ta,mu +; CHECK-NEXT: vfnmsub.vf v16, ft0, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsub.nxv8f32.f32( + %0, + float %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmsub.mask.nxv8f32.f32( + , + float, + , + , + i64); + +define @intrinsic_vfnmsub_mask_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv8f32_f32_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m4,ta,mu +; CHECK-NEXT: vfnmsub.vf v16, ft0, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsub.mask.nxv8f32.f32( + %0, + float %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfnmsub.nxv1f64.f64( + , + double, + , + i64); + +define @intrinsic_vfnmsub_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv1f64_f64_nxv1f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.d.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e64,m1,ta,mu +; CHECK-NEXT: vfnmsub.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsub.nxv1f64.f64( + %0, + double %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmsub.mask.nxv1f64.f64( + , + double, + , + , + i64); + +define @intrinsic_vfnmsub_mask_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv1f64_f64_nxv1f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.d.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e64,m1,ta,mu +; CHECK-NEXT: vfnmsub.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsub.mask.nxv1f64.f64( + %0, + double %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfnmsub.nxv2f64.f64( + , + double, + , + i64); + +define @intrinsic_vfnmsub_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv2f64_f64_nxv2f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.d.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e64,m2,ta,mu +; CHECK-NEXT: vfnmsub.vf v16, ft0, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsub.nxv2f64.f64( + %0, + double %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmsub.mask.nxv2f64.f64( + , + double, + , + , + i64); + +define @intrinsic_vfnmsub_mask_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv2f64_f64_nxv2f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.d.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e64,m2,ta,mu +; CHECK-NEXT: vfnmsub.vf v16, ft0, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsub.mask.nxv2f64.f64( + %0, + double %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfnmsub.nxv4f64.f64( + , + double, + , + i64); + +define @intrinsic_vfnmsub_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv4f64_f64_nxv4f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.d.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e64,m4,ta,mu +; CHECK-NEXT: vfnmsub.vf v16, ft0, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsub.nxv4f64.f64( + %0, + double %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfnmsub.mask.nxv4f64.f64( + , + double, + , + , + i64); + +define @intrinsic_vfnmsub_mask_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv4f64_f64_nxv4f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.d.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e64,m4,ta,mu +; CHECK-NEXT: vfnmsub.vf v16, ft0, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfnmsub.mask.nxv4f64.f64( + %0, + double %1, + %2, + %3, + i64 %4) + + ret %a +} From bac54639c7be602cabffcc3b801316f784f1c4b1 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 18 Dec 2020 11:51:59 -0500 Subject: [PATCH 148/378] AMDGPU: Add spilled CSR SGPRs to entry block live ins --- llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp | 14 ++++++++ .../AMDGPU/csr-sgpr-spill-live-ins.mir | 35 +++++++++++++++++++ 2 files changed, 49 insertions(+) create mode 100644 llvm/test/CodeGen/AMDGPU/csr-sgpr-spill-live-ins.mir diff --git a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp index 130edd83bef6e..65c7f49b646c5 100644 --- a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp +++ b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp @@ -185,6 +185,16 @@ void SILowerSGPRSpills::calculateSaveRestoreBlocks(MachineFunction &MF) { } } +// TODO: To support shrink wrapping, this would need to copy +// PrologEpilogInserter's updateLiveness. +static void updateLiveness(MachineFunction &MF, ArrayRef CSI) { + MachineBasicBlock &EntryBB = MF.front(); + + for (const CalleeSavedInfo &CSIReg : CSI) + EntryBB.addLiveIn(CSIReg.getReg()); + EntryBB.sortUniqueLiveIns(); +} + bool SILowerSGPRSpills::spillCalleeSavedRegs(MachineFunction &MF) { MachineRegisterInfo &MRI = MF.getRegInfo(); const Function &F = MF.getFunction(); @@ -222,6 +232,10 @@ bool SILowerSGPRSpills::spillCalleeSavedRegs(MachineFunction &MF) { for (MachineBasicBlock *SaveBlock : SaveBlocks) insertCSRSaves(*SaveBlock, CSI, LIS); + // Add live ins to save blocks. + assert(SaveBlocks.size() == 1 && "shrink wrapping not fully implemented"); + updateLiveness(MF, CSI); + for (MachineBasicBlock *RestoreBlock : RestoreBlocks) insertCSRRestores(*RestoreBlock, CSI, LIS); return true; diff --git a/llvm/test/CodeGen/AMDGPU/csr-sgpr-spill-live-ins.mir b/llvm/test/CodeGen/AMDGPU/csr-sgpr-spill-live-ins.mir new file mode 100644 index 0000000000000..6bc94455e0a8b --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/csr-sgpr-spill-live-ins.mir @@ -0,0 +1,35 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=gfx906 -run-pass=si-lower-sgpr-spills,prologepilog -o - %s | FileCheck %s + +# Make sure the modified CSR VGPRs are added as live-in to the entry +# block. + +--- +name: def_csr_sgpr +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + stackPtrOffsetReg: $sgpr32 +body: | + ; CHECK-LABEL: name: def_csr_sgpr + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $sgpr42, $sgpr43, $sgpr46, $sgpr47, $vgpr0 + ; CHECK: $vgpr0 = V_WRITELANE_B32 killed $sgpr42, 0, $vgpr0 + ; CHECK: $vgpr0 = V_WRITELANE_B32 killed $sgpr43, 1, $vgpr0 + ; CHECK: $vgpr0 = V_WRITELANE_B32 killed $sgpr46, 2, $vgpr0 + ; CHECK: $vgpr0 = V_WRITELANE_B32 killed $sgpr47, 3, $vgpr0 + ; CHECK: S_NOP 0 + ; CHECK: bb.1: + ; CHECK: liveins: $vgpr0 + ; CHECK: $sgpr42 = S_MOV_B32 0 + ; CHECK: $sgpr43 = S_MOV_B32 1 + ; CHECK: $sgpr46_sgpr47 = S_MOV_B64 2 + bb.0: + S_NOP 0 + + bb.1: + $sgpr42 = S_MOV_B32 0 + $sgpr43 = S_MOV_B32 1 + $sgpr46_sgpr47 = S_MOV_B64 2 +... From 8bf9cdeaee4834bcba35322f1d84c57c691d2244 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 18 Dec 2020 10:51:17 -0500 Subject: [PATCH 149/378] AMDGPU: Use Register --- llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp index 65c7f49b646c5..939a9676ad3bd 100644 --- a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp +++ b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp @@ -98,7 +98,7 @@ static void insertCSRSaves(MachineBasicBlock &SaveBlock, if (!TFI->spillCalleeSavedRegisters(SaveBlock, I, CSI, TRI)) { for (const CalleeSavedInfo &CS : CSI) { // Insert the spill to the stack frame. - unsigned Reg = CS.getReg(); + MCRegister Reg = CS.getReg(); MachineInstrSpan MIS(I, &SaveBlock); const TargetRegisterClass *RC = @@ -217,7 +217,8 @@ bool SILowerSGPRSpills::spillCalleeSavedRegs(MachineFunction &MF) { const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs(); for (unsigned I = 0; CSRegs[I]; ++I) { - unsigned Reg = CSRegs[I]; + MCRegister Reg = CSRegs[I]; + if (SavedRegs.test(Reg)) { const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, MVT::i32); From 77fb45e59e49d25fbc57854b62599ae24aa2c4c9 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Tue, 22 Dec 2020 15:51:20 -0500 Subject: [PATCH 150/378] [lld/mac] Add --version flag It's an extension to ld64, but all the other ports have it, and someone asked for it in PR43721. While here, change the COFF help text to match the other ports. Differential Revision: https://reviews.llvm.org/D93491 --- lld/COFF/Options.td | 2 +- lld/MachO/Driver.cpp | 7 ++++++- lld/MachO/Options.td | 4 +++- lld/test/MachO/driver.test | 4 +++- 4 files changed, 13 insertions(+), 4 deletions(-) diff --git a/lld/COFF/Options.td b/lld/COFF/Options.td index 2b8e65587ad7c..5447c83121288 100644 --- a/lld/COFF/Options.td +++ b/lld/COFF/Options.td @@ -240,7 +240,7 @@ def lto_obj_path : P< "lto-obj-path", "output native object for merged LTO unit to this path">; def dash_dash_version : Flag<["--"], "version">, - HelpText<"Print version information">; + HelpText<"Display the version number and exit">; def threads : P<"threads", "Number of threads. '1' disables multi-threading. By " "default all available hardware threads are used">; diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp index 82ddcf084dc00..edc9fe001ab5d 100644 --- a/lld/MachO/Driver.cpp +++ b/lld/MachO/Driver.cpp @@ -690,10 +690,15 @@ bool macho::link(llvm::ArrayRef argsArr, bool canExitEarly, if (args.hasArg(OPT_help_hidden)) { parser.printHelp(argsArr[0], /*showHidden=*/true); return true; - } else if (args.hasArg(OPT_help)) { + } + if (args.hasArg(OPT_help)) { parser.printHelp(argsArr[0], /*showHidden=*/false); return true; } + if (args.hasArg(OPT_version)) { + message(getLLDVersion()); + return true; + } if (const char *path = getReproduceOption(args)) { // Note that --reproduce is a debug option so you can ignore it diff --git a/lld/MachO/Options.td b/lld/MachO/Options.td index 52a351836a15a..8e88c74efc0e9 100644 --- a/lld/MachO/Options.td +++ b/lld/MachO/Options.td @@ -18,6 +18,8 @@ def reproduce: Separate<["--"], "reproduce">; def reproduce_eq: Joined<["--"], "reproduce=">, Alias(reproduce)>, HelpText<"Write tar file containing inputs and command to reproduce link">; +def version: Flag<["--"], "version">, + HelpText<"Display the version number and exit">; // This is a complete Options.td compiled from Apple's ld(1) manpage @@ -508,7 +510,7 @@ def bitcode_symbol_map : Separate<["-"], "bitcode_symbol_map">, def grp_rare : OptionGroup<"rare">, HelpText<"RARELY USED">; def v : Flag<["-"], "v">, - HelpText<"Print the linker version">, + HelpText<"Print the linker version and search paths and exit">, Group; def version_details : Flag<["-"], "version_details">, HelpText<"Print the linker version in JSON form">, diff --git a/lld/test/MachO/driver.test b/lld/test/MachO/driver.test index 229ec3ef69639..417c71eac561d 100644 --- a/lld/test/MachO/driver.test +++ b/lld/test/MachO/driver.test @@ -1,5 +1,7 @@ -# RUN: not %lld ---help 2>&1 | FileCheck -check-prefix=SPELLHELP %s +# RUN: %lld --version | FileCheck -check-prefix=VERSION %s +VERSION: {{LLD [0-9]+\.[0-9]+}} +# RUN: not %lld ---help 2>&1 | FileCheck -check-prefix=SPELLHELP %s SPELLHELP: error: unknown argument '---help', did you mean '--help' # FIXME: This should say "no input files" instead SPELLHELP: error: undefined symbol: _main From 581d13f8aeb66c040d5ea69ad4385f766e1f97c9 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 3 Nov 2020 09:50:17 -0500 Subject: [PATCH 151/378] GlobalISel: Return APInt from getConstantVRegVal Returning int64_t was arbitrarily limiting for wide integer types, and the functions should handle the full generality of the IR. Also changes the full form which returns the originally defined vreg. Add another wrapper for the common case of just immediately converting to int64_t (arguably this would be useful for the full return value case as well). One possible issue with this change is some of the existing uses did break without conversion to getConstantVRegSExtVal, and it's possible some without adequate test coverage are now broken. --- .../llvm/CodeGen/GlobalISel/MIPatternMatch.h | 2 +- llvm/include/llvm/CodeGen/GlobalISel/Utils.h | 11 +++-- .../lib/CodeGen/GlobalISel/CombinerHelper.cpp | 28 +++++------ .../GlobalISel/InstructionSelector.cpp | 2 +- llvm/lib/CodeGen/GlobalISel/Utils.cpp | 30 +++++++----- .../GISel/AArch64InstructionSelector.cpp | 47 ++++++++++--------- .../AArch64/GISel/AArch64LegalizerInfo.cpp | 2 +- .../GISel/AArch64PostLegalizerCombiner.cpp | 2 +- .../GISel/AArch64PostLegalizerLowering.cpp | 2 +- .../AMDGPU/AMDGPUInstructionSelector.cpp | 20 ++++---- .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 24 +++++----- .../Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 2 +- .../lib/Target/X86/X86InstructionSelector.cpp | 2 +- 13 files changed, 96 insertions(+), 78 deletions(-) diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h b/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h index 3357b0e1a6eb2..427906db66961 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h @@ -43,7 +43,7 @@ struct ConstantMatch { int64_t &CR; ConstantMatch(int64_t &C) : CR(C) {} bool match(const MachineRegisterInfo &MRI, Register Reg) { - if (auto MaybeCst = getConstantVRegVal(Reg, MRI)) { + if (auto MaybeCst = getConstantVRegSExtVal(Reg, MRI)) { CR = *MaybeCst; return true; } diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h index 9fad903ea16b4..446aaf8c35512 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h @@ -121,14 +121,19 @@ void reportGISelWarning(MachineFunction &MF, const TargetPassConfig &TPC, MachineOptimizationRemarkEmitter &MORE, MachineOptimizationRemarkMissed &R); +/// If \p VReg is defined by a G_CONSTANT, return the corresponding value. +Optional getConstantVRegVal(Register VReg, + const MachineRegisterInfo &MRI); + /// If \p VReg is defined by a G_CONSTANT fits in int64_t /// returns it. -Optional getConstantVRegVal(Register VReg, - const MachineRegisterInfo &MRI); +Optional getConstantVRegSExtVal(Register VReg, + const MachineRegisterInfo &MRI); + /// Simple struct used to hold a constant integer value and a virtual /// register. struct ValueAndVReg { - int64_t Value; + APInt Value; Register VReg; }; /// If \p VReg is defined by a statically evaluable chain of diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 79f74a47d83c8..90b1dcea26484 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -1029,8 +1029,7 @@ static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB) { unsigned NumBits = Ty.getScalarSizeInBits(); auto ValVRegAndVal = getConstantVRegValWithLookThrough(Val, MRI); if (!Ty.isVector() && ValVRegAndVal) { - unsigned KnownVal = ValVRegAndVal->Value; - APInt Scalar = APInt(8, KnownVal); + APInt Scalar = ValVRegAndVal->Value.truncOrSelf(8); APInt SplatVal = APInt::getSplat(NumBits, Scalar); return MIB.buildConstant(Ty, SplatVal).getReg(0); } @@ -1411,7 +1410,7 @@ bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) { auto LenVRegAndVal = getConstantVRegValWithLookThrough(Len, MRI); if (!LenVRegAndVal) return false; // Leave it to the legalizer to lower it to a libcall. - unsigned KnownLen = LenVRegAndVal->Value; + unsigned KnownLen = LenVRegAndVal->Value.getZExtValue(); if (KnownLen == 0) { MI.eraseFromParent(); @@ -1521,7 +1520,7 @@ bool CombinerHelper::matchPtrAddImmedChain(MachineInstr &MI, return false; // Pass the combined immediate to the apply function. - MatchInfo.Imm = MaybeImmVal->Value + MaybeImm2Val->Value; + MatchInfo.Imm = (MaybeImmVal->Value + MaybeImm2Val->Value).getSExtValue(); MatchInfo.Base = Base; return true; } @@ -1571,7 +1570,7 @@ bool CombinerHelper::matchShiftImmedChain(MachineInstr &MI, return false; // Pass the combined immediate to the apply function. - MatchInfo.Imm = MaybeImmVal->Value + MaybeImm2Val->Value; + MatchInfo.Imm = (MaybeImmVal->Value + MaybeImm2Val->Value).getSExtValue(); MatchInfo.Reg = Base; // There is no simple replacement for a saturating unsigned left shift that @@ -1654,7 +1653,7 @@ bool CombinerHelper::matchShiftOfShiftedLogic(MachineInstr &MI, if (!MaybeImmVal) return false; - const uint64_t C1Val = MaybeImmVal->Value; + const uint64_t C1Val = MaybeImmVal->Value.getZExtValue(); auto matchFirstShift = [&](const MachineInstr *MI, uint64_t &ShiftVal) { // Shift should match previous one and should be a one-use. @@ -1668,7 +1667,7 @@ bool CombinerHelper::matchShiftOfShiftedLogic(MachineInstr &MI, if (!MaybeImmVal) return false; - ShiftVal = MaybeImmVal->Value; + ShiftVal = MaybeImmVal->Value.getSExtValue(); return true; }; @@ -1738,10 +1737,11 @@ bool CombinerHelper::matchCombineMulToShl(MachineInstr &MI, assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL"); auto MaybeImmVal = getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI); - if (!MaybeImmVal || !isPowerOf2_64(MaybeImmVal->Value)) + if (!MaybeImmVal) return false; - ShiftVal = Log2_64(MaybeImmVal->Value); - return true; + + ShiftVal = MaybeImmVal->Value.exactLogBase2(); + return (static_cast(ShiftVal) != -1); } bool CombinerHelper::applyCombineMulToShl(MachineInstr &MI, @@ -1787,7 +1787,7 @@ bool CombinerHelper::matchCombineShlOfExtend(MachineInstr &MI, return false; } - int64_t ShiftAmt = MaybeShiftAmtVal->Value; + int64_t ShiftAmt = MaybeShiftAmtVal->Value.getSExtValue(); MatchData.Reg = ExtSrc; MatchData.Imm = ShiftAmt; @@ -2026,7 +2026,7 @@ bool CombinerHelper::matchCombineShiftToUnmerge(MachineInstr &MI, if (!MaybeImmVal) return false; - ShiftVal = MaybeImmVal->Value; + ShiftVal = MaybeImmVal->Value.getSExtValue(); return ShiftVal >= Size / 2 && ShiftVal < Size; } @@ -2200,7 +2200,7 @@ bool CombinerHelper::matchCombineConstPtrAddToI2P(MachineInstr &MI, Register RHS = MI.getOperand(2).getReg(); MachineRegisterInfo &MRI = Builder.getMF().getRegInfo(); - if (auto RHSCst = getConstantVRegVal(RHS, MRI)) { + if (auto RHSCst = getConstantVRegSExtVal(RHS, MRI)) { int64_t Cst; if (mi_match(LHS, MRI, m_GIntToPtr(m_ICst(Cst)))) { NewCst = Cst + *RHSCst; @@ -2441,7 +2441,7 @@ bool CombinerHelper::matchConstantSelectCmp(MachineInstr &MI, unsigned &OpIdx) { assert(MI.getOpcode() == TargetOpcode::G_SELECT); if (auto MaybeCstCmp = getConstantVRegValWithLookThrough(MI.getOperand(1).getReg(), MRI)) { - OpIdx = MaybeCstCmp->Value ? 2 : 3; + OpIdx = MaybeCstCmp->Value.isNullValue() ? 3 : 2; return true; } return false; diff --git a/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp b/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp index 2fedc034d315f..3eca16808ea61 100644 --- a/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp +++ b/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp @@ -50,7 +50,7 @@ bool InstructionSelector::isOperandImmEqual( const MachineRegisterInfo &MRI) const { if (MO.isReg() && MO.getReg()) if (auto VRegVal = getConstantVRegValWithLookThrough(MO.getReg(), MRI)) - return VRegVal->Value == Value; + return VRegVal->Value.getSExtValue() == Value; return false; } diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp index 4d9580de3b7bb..c19ee2e3860fd 100644 --- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -255,8 +255,8 @@ void llvm::reportGISelFailure(MachineFunction &MF, const TargetPassConfig &TPC, reportGISelFailure(MF, TPC, MORE, R); } -Optional llvm::getConstantVRegVal(Register VReg, - const MachineRegisterInfo &MRI) { +Optional llvm::getConstantVRegVal(Register VReg, + const MachineRegisterInfo &MRI) { Optional ValAndVReg = getConstantVRegValWithLookThrough(VReg, MRI, /*LookThroughInstrs*/ false); assert((!ValAndVReg || ValAndVReg->VReg == VReg) && @@ -266,6 +266,14 @@ Optional llvm::getConstantVRegVal(Register VReg, return ValAndVReg->Value; } +Optional llvm::getConstantVRegSExtVal(Register VReg, + const MachineRegisterInfo &MRI) { + Optional Val = getConstantVRegVal(VReg, MRI); + if (Val && Val->getBitWidth() <= 64) + return Val->getSExtValue(); + return None; +} + Optional llvm::getConstantVRegValWithLookThrough( Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs, bool HandleFConstant) { @@ -337,10 +345,7 @@ Optional llvm::getConstantVRegValWithLookThrough( } } - if (Val.getBitWidth() > 64) - return None; - - return ValueAndVReg{Val.getSExtValue(), VReg}; + return ValueAndVReg{Val, VReg}; } const ConstantFP * @@ -413,9 +418,8 @@ Optional llvm::ConstantFoldBinOp(unsigned Opcode, const Register Op1, if (!MaybeOp1Cst) return None; - LLT Ty = MRI.getType(Op1); - APInt C1(Ty.getSizeInBits(), *MaybeOp1Cst, true); - APInt C2(Ty.getSizeInBits(), *MaybeOp2Cst, true); + const APInt &C1 = *MaybeOp1Cst; + const APInt &C2 = *MaybeOp2Cst; switch (Opcode) { default: break; @@ -535,13 +539,13 @@ Optional llvm::ConstantFoldExtOp(unsigned Opcode, const Register Op1, const MachineRegisterInfo &MRI) { auto MaybeOp1Cst = getConstantVRegVal(Op1, MRI); if (MaybeOp1Cst) { - LLT Ty = MRI.getType(Op1); - APInt C1(Ty.getSizeInBits(), *MaybeOp1Cst, true); switch (Opcode) { default: break; - case TargetOpcode::G_SEXT_INREG: - return C1.trunc(Imm).sext(C1.getBitWidth()); + case TargetOpcode::G_SEXT_INREG: { + LLT Ty = MRI.getType(Op1); + return MaybeOp1Cst->trunc(Imm).sext(Ty.getScalarSizeInBits()); + } } } return None; diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp index 4126017c6fbde..c810fcaca7663 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -574,7 +574,7 @@ static Optional getImmedFromMO(const MachineOperand &Root) { getConstantVRegValWithLookThrough(Root.getReg(), MRI, true); if (!ValAndVReg) return None; - Immed = ValAndVReg->Value; + Immed = ValAndVReg->Value.getSExtValue(); } else return None; return Immed; @@ -1109,8 +1109,8 @@ AArch64InstructionSelector::emitSelect(Register Dst, Register True, Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR; if (TrueCst && FalseCst) { - auto T = TrueCst->Value; - auto F = FalseCst->Value; + int64_t T = TrueCst->Value.getSExtValue(); + int64_t F = FalseCst->Value.getSExtValue(); if (T == 0 && F == 1) { // G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc @@ -1130,7 +1130,7 @@ AArch64InstructionSelector::emitSelect(Register Dst, Register True, } if (TrueCst) { - auto T = TrueCst->Value; + int64_t T = TrueCst->Value.getSExtValue(); if (T == 1) { // G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr; @@ -1151,7 +1151,7 @@ AArch64InstructionSelector::emitSelect(Register Dst, Register True, } if (FalseCst) { - auto F = FalseCst->Value; + int64_t F = FalseCst->Value.getSExtValue(); if (F == 1) { // G_SELECT cc, t, 1 -> CSINC t, zreg, cc Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr; @@ -1304,7 +1304,7 @@ static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert, VRegAndVal = getConstantVRegValWithLookThrough(ConstantReg, MRI); } if (VRegAndVal) - C = VRegAndVal->Value; + C = VRegAndVal->Value.getSExtValue(); break; } case TargetOpcode::G_ASHR: @@ -1314,7 +1314,7 @@ static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert, auto VRegAndVal = getConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI); if (VRegAndVal) - C = VRegAndVal->Value; + C = VRegAndVal->Value.getSExtValue(); break; } } @@ -1442,10 +1442,13 @@ bool AArch64InstructionSelector::tryOptAndIntoCompareBranch( // (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set) auto MaybeBit = getConstantVRegValWithLookThrough( AndInst.getOperand(2).getReg(), *MIB.getMRI()); - if (!MaybeBit || !isPowerOf2_64(MaybeBit->Value)) + if (!MaybeBit) + return false; + + int32_t Bit = MaybeBit->Value.exactLogBase2(); + if (Bit < 0) return false; - uint64_t Bit = Log2_64(static_cast(MaybeBit->Value)); Register TestReg = AndInst.getOperand(1).getReg(); // Emit a TB(N)Z. @@ -1522,7 +1525,7 @@ bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp( // Note that we don't want to do this when we have a G_AND because it can // become a tst. The tst will make the test bit in the TB(N)Z redundant. if (VRegAndVal && !AndInst) { - int64_t C = VRegAndVal->Value; + int64_t C = VRegAndVal->Value.getSExtValue(); // When we have a greater-than comparison, we can just test if the msb is // zero. @@ -1654,8 +1657,8 @@ static Optional getVectorShiftImm(Register Reg, return None; if (Idx == 1) - ImmVal = VRegAndVal->Value; - if (ImmVal != VRegAndVal->Value) + ImmVal = VRegAndVal->Value.getSExtValue(); + if (ImmVal != VRegAndVal->Value.getSExtValue()) return None; } @@ -2735,7 +2738,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) { case TargetOpcode::G_PTRMASK: { Register MaskReg = I.getOperand(2).getReg(); - Optional MaskVal = getConstantVRegVal(MaskReg, MRI); + Optional MaskVal = getConstantVRegSExtVal(MaskReg, MRI); // TODO: Implement arbitrary cases if (!MaskVal || !isShiftedMask_64(*MaskVal)) return false; @@ -3749,7 +3752,7 @@ bool AArch64InstructionSelector::selectExtractElt( auto VRegAndVal = getConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI); if (!VRegAndVal) return false; - unsigned LaneIdx = VRegAndVal->Value; + unsigned LaneIdx = VRegAndVal->Value.getSExtValue(); MachineIRBuilder MIRBuilder(I); @@ -4116,10 +4119,11 @@ AArch64InstructionSelector::emitTST(MachineOperand &LHS, MachineOperand &RHS, // ANDS needs a logical immediate for its immediate form. Check if we can // fold one in. if (auto ValAndVReg = getConstantVRegValWithLookThrough(RHS.getReg(), MRI)) { - if (AArch64_AM::isLogicalImmediate(ValAndVReg->Value, RegSize)) { + int64_t Imm = ValAndVReg->Value.getSExtValue(); + + if (AArch64_AM::isLogicalImmediate(Imm, RegSize)) { auto TstMI = MIRBuilder.buildInstr(OpcTable[0][Is32Bit], {Ty}, {LHS}); - TstMI.addImm( - AArch64_AM::encodeLogicalImmediate(ValAndVReg->Value, RegSize)); + TstMI.addImm(AArch64_AM::encodeLogicalImmediate(Imm, RegSize)); constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI); return &*TstMI; } @@ -4658,7 +4662,7 @@ bool AArch64InstructionSelector::selectInsertElt( auto VRegAndVal = getConstantVRegValWithLookThrough(IdxReg, MRI); if (!VRegAndVal) return false; - unsigned LaneIdx = VRegAndVal->Value; + unsigned LaneIdx = VRegAndVal->Value.getSExtValue(); // Perform the lane insert. Register SrcReg = I.getOperand(1).getReg(); @@ -5198,7 +5202,7 @@ AArch64InstructionSelector::selectExtendedSHL( // The value must fit into 3 bits, and must be positive. Make sure that is // true. - int64_t ImmVal = ValAndVReg->Value; + int64_t ImmVal = ValAndVReg->Value.getSExtValue(); // Since we're going to pull this into a shift, the constant value must be // a power of 2. If we got a multiply, then we need to check this. @@ -5362,7 +5366,7 @@ AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root, getConstantVRegValWithLookThrough(PtrAdd->getOperand(2).getReg(), MRI); if (ValAndVReg) { unsigned Scale = Log2_32(SizeInBytes); - int64_t ImmOff = ValAndVReg->Value; + int64_t ImmOff = ValAndVReg->Value.getSExtValue(); // Skip immediates that can be selected in the load/store addresing // mode. @@ -5821,7 +5825,8 @@ void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB, const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && "Expected G_CONSTANT"); - Optional CstVal = getConstantVRegVal(MI.getOperand(0).getReg(), MRI); + Optional CstVal = + getConstantVRegSExtVal(MI.getOperand(0).getReg(), MRI); assert(CstVal && "Expected constant value"); MIB.addImm(CstVal.getValue()); } diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index 73a7b8e7b83ff..0774f7b02dd2b 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -837,7 +837,7 @@ bool AArch64LegalizerInfo::legalizeShlAshrLshr( if (!VRegAndVal) return true; // Check the shift amount is in range for an immediate form. - int64_t Amount = VRegAndVal->Value; + int64_t Amount = VRegAndVal->Value.getSExtValue(); if (Amount > 31) return true; // This will have to remain a register variant. auto ExtCst = MIRBuilder.buildConstant(LLT::scalar(64), Amount); diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp index ce298c766e41f..fdd04cb77fad3 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp @@ -130,7 +130,7 @@ bool matchAArch64MulConstCombine( if (!Const) return false; - const APInt &ConstValue = APInt(Ty.getSizeInBits(), Const->Value, true); + const APInt ConstValue = Const->Value.sextOrSelf(Ty.getSizeInBits()); // The following code is ported from AArch64ISelLowering. // Multiplication of a power of two plus/minus one can be done more // cheaply as as shift+add/sub. For now, this is true unilaterally. If diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp index 43f28729baa1f..a06ff4b5417a6 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp @@ -438,7 +438,7 @@ tryAdjustICmpImmAndPred(Register RHS, CmpInst::Predicate P, auto ValAndVReg = getConstantVRegValWithLookThrough(RHS, MRI); if (!ValAndVReg) return None; - uint64_t C = ValAndVReg->Value; + uint64_t C = ValAndVReg->Value.getZExtValue(); if (isLegalArithImmed(C)) return None; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 6c2ff0972ae5b..ac6ddbae350b7 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -611,8 +611,10 @@ bool AMDGPUInstructionSelector::selectG_BUILD_VECTOR_TRUNC( if (ConstSrc1) { auto ConstSrc0 = getConstantVRegValWithLookThrough(Src0, *MRI, true, true); if (ConstSrc0) { - uint32_t Lo16 = static_cast(ConstSrc0->Value) & 0xffff; - uint32_t Hi16 = static_cast(ConstSrc1->Value) & 0xffff; + const int64_t K0 = ConstSrc0->Value.getSExtValue(); + const int64_t K1 = ConstSrc1->Value.getSExtValue(); + uint32_t Lo16 = static_cast(K0) & 0xffff; + uint32_t Hi16 = static_cast(K1) & 0xffff; BuildMI(*BB, &MI, DL, TII.get(AMDGPU::S_MOV_B32), Dst) .addImm(Lo16 | (Hi16 << 16)); @@ -820,7 +822,7 @@ bool AMDGPUInstructionSelector::selectWritelane(MachineInstr &MI) const { // The selector has to be an inline immediate, so we can use whatever for // the other operands. MIB.addReg(Val); - MIB.addImm(ConstSelect->Value & + MIB.addImm(ConstSelect->Value.getSExtValue() & maskTrailingOnes(STI.getWavefrontSizeLog2())); } else { Optional ConstVal = @@ -828,9 +830,9 @@ bool AMDGPUInstructionSelector::selectWritelane(MachineInstr &MI) const { // If the value written is an inline immediate, we can get away without a // copy to m0. - if (ConstVal && AMDGPU::isInlinableLiteral32(ConstVal->Value, + if (ConstVal && AMDGPU::isInlinableLiteral32(ConstVal->Value.getSExtValue(), STI.hasInv2PiInlineImm())) { - MIB.addImm(ConstVal->Value); + MIB.addImm(ConstVal->Value.getSExtValue()); MIB.addReg(LaneSelect); } else { MIB.addReg(Val); @@ -1101,7 +1103,7 @@ bool AMDGPUInstructionSelector::selectBallot(MachineInstr &I) const { getConstantVRegValWithLookThrough(I.getOperand(2).getReg(), *MRI, true); if (Arg.hasValue()) { - const int64_t Value = Arg.getValue().Value; + const int64_t Value = Arg.getValue().Value.getSExtValue(); if (Value == 0) { unsigned Opcode = Is64 ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32; BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg).addImm(0); @@ -3430,7 +3432,7 @@ AMDGPUInstructionSelector::selectFlatOffsetImpl(MachineOperand &Root) const { return Default; Optional Offset = - getConstantVRegVal(OpDef->getOperand(2).getReg(), *MRI); + getConstantVRegSExtVal(OpDef->getOperand(2).getReg(), *MRI); if (!Offset.hasValue()) return Default; @@ -3919,7 +3921,7 @@ AMDGPUInstructionSelector::getPtrBaseWithConstantOffset( = getConstantVRegValWithLookThrough(RHS.getReg(), MRI, true); if (!MaybeOffset) return {Root, 0}; - return {RootI->getOperand(1).getReg(), MaybeOffset->Value}; + return {RootI->getOperand(1).getReg(), MaybeOffset->Value.getSExtValue()}; } static void addZeroImm(MachineInstrBuilder &MIB) { @@ -4247,7 +4249,7 @@ AMDGPUInstructionSelector::selectMUBUFOffsetAtomic(MachineOperand &Root) const { static Optional getConstantZext32Val(Register Reg, const MachineRegisterInfo &MRI) { // getConstantVRegVal sexts any values, so see if that matters. - Optional OffsetVal = getConstantVRegVal(Reg, MRI); + Optional OffsetVal = getConstantVRegSExtVal(Reg, MRI); if (!OffsetVal || !isInt<32>(*OffsetVal)) return None; return Lo_32(*OffsetVal); diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 28cd867d40be2..8c733a2afa032 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -2087,10 +2087,11 @@ bool AMDGPULegalizerInfo::legalizeExtractVectorElt( // FIXME: Artifact combiner probably should have replaced the truncated // constant before this, so we shouldn't need // getConstantVRegValWithLookThrough. - Optional IdxVal = getConstantVRegValWithLookThrough( - MI.getOperand(2).getReg(), MRI); - if (!IdxVal) // Dynamic case will be selected to register indexing. + Optional MaybeIdxVal = + getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI); + if (!MaybeIdxVal) // Dynamic case will be selected to register indexing. return true; + const int64_t IdxVal = MaybeIdxVal->Value.getSExtValue(); Register Dst = MI.getOperand(0).getReg(); Register Vec = MI.getOperand(1).getReg(); @@ -2099,8 +2100,8 @@ bool AMDGPULegalizerInfo::legalizeExtractVectorElt( LLT EltTy = VecTy.getElementType(); assert(EltTy == MRI.getType(Dst)); - if (IdxVal->Value < VecTy.getNumElements()) - B.buildExtract(Dst, Vec, IdxVal->Value * EltTy.getSizeInBits()); + if (IdxVal < VecTy.getNumElements()) + B.buildExtract(Dst, Vec, IdxVal * EltTy.getSizeInBits()); else B.buildUndef(Dst); @@ -2118,11 +2119,12 @@ bool AMDGPULegalizerInfo::legalizeInsertVectorElt( // FIXME: Artifact combiner probably should have replaced the truncated // constant before this, so we shouldn't need // getConstantVRegValWithLookThrough. - Optional IdxVal = getConstantVRegValWithLookThrough( - MI.getOperand(3).getReg(), MRI); - if (!IdxVal) // Dynamic case will be selected to register indexing. + Optional MaybeIdxVal = + getConstantVRegValWithLookThrough(MI.getOperand(3).getReg(), MRI); + if (!MaybeIdxVal) // Dynamic case will be selected to register indexing. return true; + int64_t IdxVal = MaybeIdxVal->Value.getSExtValue(); Register Dst = MI.getOperand(0).getReg(); Register Vec = MI.getOperand(1).getReg(); Register Ins = MI.getOperand(2).getReg(); @@ -2131,8 +2133,8 @@ bool AMDGPULegalizerInfo::legalizeInsertVectorElt( LLT EltTy = VecTy.getElementType(); assert(EltTy == MRI.getType(Ins)); - if (IdxVal->Value < VecTy.getNumElements()) - B.buildInsert(Dst, Vec, Ins, IdxVal->Value * EltTy.getSizeInBits()); + if (IdxVal < VecTy.getNumElements()) + B.buildInsert(Dst, Vec, Ins, IdxVal * EltTy.getSizeInBits()); else B.buildUndef(Dst); @@ -2643,7 +2645,7 @@ bool AMDGPULegalizerInfo::legalizeBuildVector( static bool isNot(const MachineRegisterInfo &MRI, const MachineInstr &MI) { if (MI.getOpcode() != TargetOpcode::G_XOR) return false; - auto ConstVal = getConstantVRegVal(MI.getOperand(2).getReg(), MRI); + auto ConstVal = getConstantVRegSExtVal(MI.getOperand(2).getReg(), MRI); return ConstVal && *ConstVal == -1; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index 6168b50f9cefd..54124d7b5a6eb 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -1331,7 +1331,7 @@ static unsigned setBufferOffsets(MachineIRBuilder &B, const LLT S32 = LLT::scalar(32); MachineRegisterInfo *MRI = B.getMRI(); - if (Optional Imm = getConstantVRegVal(CombinedOffset, *MRI)) { + if (Optional Imm = getConstantVRegSExtVal(CombinedOffset, *MRI)) { uint32_t SOffset, ImmOffset; if (AMDGPU::splitMUBUFOffset(*Imm, SOffset, ImmOffset, &RBI.Subtarget, Alignment)) { diff --git a/llvm/lib/Target/X86/X86InstructionSelector.cpp b/llvm/lib/Target/X86/X86InstructionSelector.cpp index 5b31b8e09c5c9..630df2715b899 100644 --- a/llvm/lib/Target/X86/X86InstructionSelector.cpp +++ b/llvm/lib/Target/X86/X86InstructionSelector.cpp @@ -479,7 +479,7 @@ static void X86SelectAddress(const MachineInstr &I, "unsupported type."); if (I.getOpcode() == TargetOpcode::G_PTR_ADD) { - if (auto COff = getConstantVRegVal(I.getOperand(2).getReg(), MRI)) { + if (auto COff = getConstantVRegSExtVal(I.getOperand(2).getReg(), MRI)) { int64_t Imm = *COff; if (isInt<32>(Imm)) { // Check for displacement overflow. AM.Disp = static_cast(Imm); From e6fde1ae7df0b019392352b61d898c9b83b775fa Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Tue, 22 Dec 2020 19:58:54 -0800 Subject: [PATCH 152/378] [MemorySSA] Use is_contained (NFC) --- llvm/lib/Analysis/MemorySSA.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llvm/lib/Analysis/MemorySSA.cpp b/llvm/lib/Analysis/MemorySSA.cpp index 00b4d42892c9b..c9ad9ffab16e7 100644 --- a/llvm/lib/Analysis/MemorySSA.cpp +++ b/llvm/lib/Analysis/MemorySSA.cpp @@ -2017,8 +2017,7 @@ void MemorySSA::verifyOrderingDominationAndDefUses(Function &F) const { "Incomplete MemoryPhi Node"); for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) { verifyUseInDefs(Phi->getIncomingValue(I), Phi); - assert(find(predecessors(&B), Phi->getIncomingBlock(I)) != - pred_end(&B) && + assert(is_contained(predecessors(&B), Phi->getIncomingBlock(I)) && "Incoming phi block not a block predecessor"); } #endif From efe7f5ede0b3276f3f43daca46410bb7978221fb Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Tue, 22 Dec 2020 20:06:12 -0800 Subject: [PATCH 153/378] [WebAssembly][NFC] Refactor SIMD load/store tablegen defs Introduce `Vec` records, each bundling all information related to a single SIMD lane interpretation. This lets TableGen definitions take a single Vec parameter from which they can extract information rather than taking multiple redundant parameters. This commit refactors all of the SIMD load and store instruction definitions to use the new `Vec`s. Subsequent commits will similarly refactor additional instruction definitions. Differential Revision: https://reviews.llvm.org/D93660 --- .../MCTargetDesc/WebAssemblyMCTargetDesc.h | 32 +- .../WebAssembly/WebAssemblyInstrSIMD.td | 290 +++++++++++------- 2 files changed, 190 insertions(+), 132 deletions(-) diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h index 4bc77aa68668b..6c819f396ddc1 100644 --- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h +++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h @@ -195,8 +195,8 @@ inline unsigned GetDefaultP2AlignAny(unsigned Opc) { WASM_LOAD_STORE(ATOMIC_RMW8_U_CMPXCHG_I32) WASM_LOAD_STORE(ATOMIC_RMW8_U_CMPXCHG_I64) WASM_LOAD_STORE(LOAD8_SPLAT) - WASM_LOAD_STORE(LOAD_LANE_v16i8) - WASM_LOAD_STORE(STORE_LANE_v16i8) + WASM_LOAD_STORE(LOAD_LANE_I8x16) + WASM_LOAD_STORE(STORE_LANE_I8x16) return 0; WASM_LOAD_STORE(LOAD16_S_I32) WASM_LOAD_STORE(LOAD16_U_I32) @@ -223,8 +223,8 @@ inline unsigned GetDefaultP2AlignAny(unsigned Opc) { WASM_LOAD_STORE(ATOMIC_RMW16_U_CMPXCHG_I32) WASM_LOAD_STORE(ATOMIC_RMW16_U_CMPXCHG_I64) WASM_LOAD_STORE(LOAD16_SPLAT) - WASM_LOAD_STORE(LOAD_LANE_v8i16) - WASM_LOAD_STORE(STORE_LANE_v8i16) + WASM_LOAD_STORE(LOAD_LANE_I16x8) + WASM_LOAD_STORE(STORE_LANE_I16x8) return 1; WASM_LOAD_STORE(LOAD_I32) WASM_LOAD_STORE(LOAD_F32) @@ -254,9 +254,9 @@ inline unsigned GetDefaultP2AlignAny(unsigned Opc) { WASM_LOAD_STORE(MEMORY_ATOMIC_NOTIFY) WASM_LOAD_STORE(MEMORY_ATOMIC_WAIT32) WASM_LOAD_STORE(LOAD32_SPLAT) - WASM_LOAD_STORE(LOAD_ZERO_v4i32) - WASM_LOAD_STORE(LOAD_LANE_v4i32) - WASM_LOAD_STORE(STORE_LANE_v4i32) + WASM_LOAD_STORE(LOAD_ZERO_I32x4) + WASM_LOAD_STORE(LOAD_LANE_I32x4) + WASM_LOAD_STORE(STORE_LANE_I32x4) return 2; WASM_LOAD_STORE(LOAD_I64) WASM_LOAD_STORE(LOAD_F64) @@ -273,15 +273,15 @@ inline unsigned GetDefaultP2AlignAny(unsigned Opc) { WASM_LOAD_STORE(ATOMIC_RMW_CMPXCHG_I64) WASM_LOAD_STORE(MEMORY_ATOMIC_WAIT64) WASM_LOAD_STORE(LOAD64_SPLAT) - WASM_LOAD_STORE(LOAD_EXTEND_S_v8i16) - WASM_LOAD_STORE(LOAD_EXTEND_U_v8i16) - WASM_LOAD_STORE(LOAD_EXTEND_S_v4i32) - WASM_LOAD_STORE(LOAD_EXTEND_U_v4i32) - WASM_LOAD_STORE(LOAD_EXTEND_S_v2i64) - WASM_LOAD_STORE(LOAD_EXTEND_U_v2i64) - WASM_LOAD_STORE(LOAD_ZERO_v2i64) - WASM_LOAD_STORE(LOAD_LANE_v2i64) - WASM_LOAD_STORE(STORE_LANE_v2i64) + WASM_LOAD_STORE(LOAD_EXTEND_S_I16x8) + WASM_LOAD_STORE(LOAD_EXTEND_U_I16x8) + WASM_LOAD_STORE(LOAD_EXTEND_S_I32x4) + WASM_LOAD_STORE(LOAD_EXTEND_U_I32x4) + WASM_LOAD_STORE(LOAD_EXTEND_S_I64x2) + WASM_LOAD_STORE(LOAD_EXTEND_U_I64x2) + WASM_LOAD_STORE(LOAD_ZERO_I64x2) + WASM_LOAD_STORE(LOAD_LANE_I64x2) + WASM_LOAD_STORE(STORE_LANE_I64x2) return 3; WASM_LOAD_STORE(LOAD_V128) WASM_LOAD_STORE(STORE_V128) diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td index e48bbaebd47e9..df4de49ee4c8f 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td @@ -37,6 +37,75 @@ def ImmI#SIZE : ImmLeaf; +class Vec { + ValueType vt; + ValueType lane_vt; + WebAssemblyRegClass lane_rc; + int lane_bits; + ImmLeaf lane_idx; + string prefix; + Vec split; +} + +def I8x16 : Vec { + let vt = v16i8; + let lane_vt = i32; + let lane_rc = I32; + let lane_bits = 8; + let lane_idx = LaneIdx16; + let prefix = "i8x16"; +} + +def I16x8 : Vec { + let vt = v8i16; + let lane_vt = i32; + let lane_rc = I32; + let lane_bits = 16; + let lane_idx = LaneIdx8; + let prefix = "i16x8"; + let split = I8x16; +} + +def I32x4 : Vec { + let vt = v4i32; + let lane_vt = i32; + let lane_rc = I32; + let lane_bits = 32; + let lane_idx = LaneIdx4; + let prefix = "i32x4"; + let split = I16x8; +} + +def I64x2 : Vec { + let vt = v2i64; + let lane_vt = i64; + let lane_rc = I64; + let lane_bits = 64; + let lane_idx = LaneIdx2; + let prefix = "i64x2"; + let split = I32x4; +} + +def F32x4 : Vec { + let vt = v4f32; + let lane_vt = f32; + let lane_rc = F32; + let lane_bits = 32; + let lane_idx = LaneIdx4; + let prefix = "f32x4"; +} + +def F64x2 : Vec { + let vt = v2f64; + let lane_vt = f64; + let lane_rc = F64; + let lane_bits = 64; + let lane_idx = LaneIdx2; + let prefix = "f64x2"; +} + +defvar AllVecs = [I8x16, I16x8, I32x4, I64x2, F32x4, F64x2]; + //===----------------------------------------------------------------------===// // Load and store //===----------------------------------------------------------------------===// @@ -56,12 +125,12 @@ defm LOAD_V128_A64 : } // Def load patterns from WebAssemblyInstrMemory.td for vector types -foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in { -defm : LoadPatNoOffset; -defm : LoadPatImmOff; -defm : LoadPatImmOff; -defm : LoadPatOffsetOnly; -defm : LoadPatGlobalAddrOffOnly; +foreach vec = AllVecs in { +defm : LoadPatNoOffset; +defm : LoadPatImmOff; +defm : LoadPatImmOff; +defm : LoadPatOffsetOnly; +defm : LoadPatGlobalAddrOffOnly; } // v128.loadX_splat @@ -94,87 +163,75 @@ def wasm_load_splat : SDNode<"WebAssemblyISD::LOAD_SPLAT", wasm_load_splat_t, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; def load_splat : PatFrag<(ops node:$addr), (wasm_load_splat node:$addr)>; -foreach args = [["v16i8", "8"], ["v8i16", "16"], ["v4i32", "32"], - ["v2i64", "64"], ["v4f32", "32"], ["v2f64", "64"]] in { -defm : LoadPatNoOffset(args[0]), - load_splat, - "LOAD"#args[1]#"_SPLAT">; -defm : LoadPatImmOff(args[0]), - load_splat, - regPlusImm, - "LOAD"#args[1]#"_SPLAT">; -defm : LoadPatImmOff(args[0]), - load_splat, - or_is_add, - "LOAD"#args[1]#"_SPLAT">; -defm : LoadPatOffsetOnly(args[0]), - load_splat, - "LOAD"#args[1]#"_SPLAT">; -defm : LoadPatGlobalAddrOffOnly(args[0]), - load_splat, - "LOAD"#args[1]#"_SPLAT">; +foreach vec = AllVecs in { +defvar inst = "LOAD"#vec.lane_bits#"_SPLAT"; +defm : LoadPatNoOffset; +defm : LoadPatImmOff; +defm : LoadPatImmOff; +defm : LoadPatOffsetOnly; +defm : LoadPatGlobalAddrOffOnly; } // Load and extend -multiclass SIMDLoadExtend simdop> { +multiclass SIMDLoadExtend simdop> { + defvar signed = vec.prefix#".load"#loadPat#"_s"; + defvar unsigned = vec.prefix#".load"#loadPat#"_u"; let mayLoad = 1, UseNamedOperandTable = 1 in { - defm LOAD_EXTEND_S_#vec_t#_A32 : + defm LOAD_EXTEND_S_#vec#_A32 : SIMD_I<(outs V128:$dst), (ins P2Align:$p2align, offset32_op:$off, I32:$addr), (outs), (ins P2Align:$p2align, offset32_op:$off), [], - name#"_s\t$dst, ${off}(${addr})$p2align", - name#"_s\t$off$p2align", simdop>; - defm LOAD_EXTEND_U_#vec_t#_A32 : + signed#"\t$dst, ${off}(${addr})$p2align", + signed#"\t$off$p2align", simdop>; + defm LOAD_EXTEND_U_#vec#_A32 : SIMD_I<(outs V128:$dst), (ins P2Align:$p2align, offset32_op:$off, I32:$addr), (outs), (ins P2Align:$p2align, offset32_op:$off), [], - name#"_u\t$dst, ${off}(${addr})$p2align", - name#"_u\t$off$p2align", !add(simdop, 1)>; - defm LOAD_EXTEND_S_#vec_t#_A64 : + unsigned#"\t$dst, ${off}(${addr})$p2align", + unsigned#"\t$off$p2align", !add(simdop, 1)>; + defm LOAD_EXTEND_S_#vec#_A64 : SIMD_I<(outs V128:$dst), (ins P2Align:$p2align, offset64_op:$off, I64:$addr), (outs), (ins P2Align:$p2align, offset64_op:$off), [], - name#"_s\t$dst, ${off}(${addr})$p2align", - name#"_s\t$off$p2align", simdop>; - defm LOAD_EXTEND_U_#vec_t#_A64 : + signed#"\t$dst, ${off}(${addr})$p2align", + signed#"\t$off$p2align", simdop>; + defm LOAD_EXTEND_U_#vec#_A64 : SIMD_I<(outs V128:$dst), (ins P2Align:$p2align, offset64_op:$off, I64:$addr), (outs), (ins P2Align:$p2align, offset64_op:$off), [], - name#"_u\t$dst, ${off}(${addr})$p2align", - name#"_u\t$off$p2align", !add(simdop, 1)>; + unsigned#"\t$dst, ${off}(${addr})$p2align", + unsigned#"\t$off$p2align", !add(simdop, 1)>; } } -defm "" : SIMDLoadExtend; -defm "" : SIMDLoadExtend; -defm "" : SIMDLoadExtend; - -foreach types = [[v8i16, i8], [v4i32, i16], [v2i64, i32]] in -foreach exts = [["sextloadv", "_S"], - ["zextloadv", "_U"], - ["extloadv", "_U"]] in { -defm : LoadPatNoOffset(exts[0]#types[1]), - "LOAD_EXTEND"#exts[1]#"_"#types[0]>; -defm : LoadPatImmOff(exts[0]#types[1]), regPlusImm, - "LOAD_EXTEND"#exts[1]#"_"#types[0]>; -defm : LoadPatImmOff(exts[0]#types[1]), or_is_add, - "LOAD_EXTEND"#exts[1]#"_"#types[0]>; -defm : LoadPatOffsetOnly(exts[0]#types[1]), - "LOAD_EXTEND"#exts[1]#"_"#types[0]>; -defm : LoadPatGlobalAddrOffOnly(exts[0]#types[1]), - "LOAD_EXTEND"#exts[1]#"_"#types[0]>; +defm "" : SIMDLoadExtend; +defm "" : SIMDLoadExtend; +defm "" : SIMDLoadExtend; + +foreach vec = [I16x8, I32x4, I64x2] in +foreach exts = [["sextloadvi", "_S"], + ["zextloadvi", "_U"], + ["extloadvi", "_U"]] in { +defvar loadpat = !cast(exts[0]#vec.split.lane_bits); +defvar inst = "LOAD_EXTEND"#exts[1]#"_"#vec; +defm : LoadPatNoOffset; +defm : LoadPatImmOff; +defm : LoadPatImmOff; +defm : LoadPatOffsetOnly; +defm : LoadPatGlobalAddrOffOnly; } // Load lane into zero vector -multiclass SIMDLoadZero simdop> { +multiclass SIMDLoadZero simdop> { + defvar name = "v128.load"#vec.lane_bits#"_zero"; let mayLoad = 1, UseNamedOperandTable = 1 in { - defm LOAD_ZERO_#vec_t#_A32 : + defm LOAD_ZERO_#vec#_A32 : SIMD_I<(outs V128:$dst), (ins P2Align:$p2align, offset32_op:$off, I32:$addr), (outs), (ins P2Align:$p2align, offset32_op:$off), [], name#"\t$dst, ${off}(${addr})$p2align", name#"\t$off$p2align", simdop>; - defm LOAD_ZERO_#vec_t#_A64 : + defm LOAD_ZERO_#vec#_A64 : SIMD_I<(outs V128:$dst), (ins P2Align:$p2align, offset64_op:$off, I64:$addr), (outs), (ins P2Align:$p2align, offset64_op:$off), [], @@ -185,35 +242,31 @@ multiclass SIMDLoadZero simdop> { // TODO: Also support v4f32 and v2f64 once the instructions are merged // to the proposal -defm "" : SIMDLoadZero; -defm "" : SIMDLoadZero; - -defm : LoadPatNoOffset; -defm : LoadPatNoOffset; - -defm : LoadPatImmOff; -defm : LoadPatImmOff; - -defm : LoadPatImmOff; -defm : LoadPatImmOff; - -defm : LoadPatOffsetOnly; -defm : LoadPatOffsetOnly; - -defm : LoadPatGlobalAddrOffOnly; -defm : LoadPatGlobalAddrOffOnly; +defm "" : SIMDLoadZero; +defm "" : SIMDLoadZero; + +foreach vec = [I32x4, I64x2] in { +defvar loadpat = !cast("int_wasm_load"#vec.lane_bits#"_zero"); +defvar inst = "LOAD_ZERO_"#vec; +defm : LoadPatNoOffset; +defm : LoadPatImmOff; +defm : LoadPatImmOff; +defm : LoadPatOffsetOnly; +defm : LoadPatGlobalAddrOffOnly; +} // Load lane -multiclass SIMDLoadLane simdop> { +multiclass SIMDLoadLane simdop> { + defvar name = "v128.load"#vec.lane_bits#"_lane"; let mayLoad = 1, UseNamedOperandTable = 1 in { - defm LOAD_LANE_#vec_t#_A32 : + defm LOAD_LANE_#vec#_A32 : SIMD_I<(outs V128:$dst), (ins P2Align:$p2align, offset32_op:$off, vec_i8imm_op:$idx, I32:$addr, V128:$vec), (outs), (ins P2Align:$p2align, offset32_op:$off, vec_i8imm_op:$idx), [], name#"\t$dst, ${off}(${addr})$p2align, $vec, $idx", name#"\t$off$p2align, $idx", simdop>; - defm LOAD_LANE_#vec_t#_A64 : + defm LOAD_LANE_#vec#_A64 : SIMD_I<(outs V128:$dst), (ins P2Align:$p2align, offset64_op:$off, vec_i8imm_op:$idx, I64:$addr, V128:$vec), @@ -225,25 +278,29 @@ multiclass SIMDLoadLane simdop> { // TODO: Also support v4f32 and v2f64 once the instructions are merged // to the proposal -defm "" : SIMDLoadLane; -defm "" : SIMDLoadLane; -defm "" : SIMDLoadLane; -defm "" : SIMDLoadLane; +defm "" : SIMDLoadLane; +defm "" : SIMDLoadLane; +defm "" : SIMDLoadLane; +defm "" : SIMDLoadLane; // Select loads with no constant offset. -multiclass LoadLanePatNoOffset { - def : Pat<(ty (kind (i32 I32:$addr), (ty V128:$vec), (i32 lane_imm_t:$idx))), - (!cast("LOAD_LANE_"#ty#"_A32") 0, 0, imm:$idx, I32:$addr, V128:$vec)>, +multiclass LoadLanePatNoOffset { + defvar load_lane_a32 = !cast("LOAD_LANE_"#vec#"_A32"); + defvar load_lane_a64 = !cast("LOAD_LANE_"#vec#"_A64"); + def : Pat<(vec.vt (kind (i32 I32:$addr), + (vec.vt V128:$vec), (i32 vec.lane_idx:$idx))), + (load_lane_a32 0, 0, imm:$idx, I32:$addr, V128:$vec)>, Requires<[HasAddr32]>; - def : Pat<(ty (kind (i64 I64:$addr), (ty V128:$vec), (i32 lane_imm_t:$idx))), - (!cast("LOAD_LANE_"#ty#"_A64") 0, 0, imm:$idx, I64:$addr, V128:$vec)>, + def : Pat<(vec.vt (kind (i64 I64:$addr), + (vec.vt V128:$vec), (i32 vec.lane_idx:$idx))), + (load_lane_a64 0, 0, imm:$idx, I64:$addr, V128:$vec)>, Requires<[HasAddr64]>; } -defm : LoadLanePatNoOffset; -defm : LoadLanePatNoOffset; -defm : LoadLanePatNoOffset; -defm : LoadLanePatNoOffset; +defm : LoadLanePatNoOffset; +defm : LoadLanePatNoOffset; +defm : LoadLanePatNoOffset; +defm : LoadLanePatNoOffset; // TODO: Also support the other load patterns for load_lane once the instructions // are merged to the proposal. @@ -263,25 +320,26 @@ defm STORE_V128_A64 : } // Def store patterns from WebAssemblyInstrMemory.td for vector types -foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in { -defm : StorePatNoOffset; -defm : StorePatImmOff; -defm : StorePatImmOff; -defm : StorePatOffsetOnly; -defm : StorePatGlobalAddrOffOnly; +foreach vec = AllVecs in { +defm : StorePatNoOffset; +defm : StorePatImmOff; +defm : StorePatImmOff; +defm : StorePatOffsetOnly; +defm : StorePatGlobalAddrOffOnly; } // Store lane -multiclass SIMDStoreLane simdop> { +multiclass SIMDStoreLane simdop> { + defvar name = "v128.store"#vec.lane_bits#"_lane"; let mayStore = 1, UseNamedOperandTable = 1 in { - defm STORE_LANE_#vec_t#_A32 : + defm STORE_LANE_#vec#_A32 : SIMD_I<(outs), (ins P2Align:$p2align, offset32_op:$off, vec_i8imm_op:$idx, I32:$addr, V128:$vec), (outs), (ins P2Align:$p2align, offset32_op:$off, vec_i8imm_op:$idx), [], name#"\t${off}(${addr})$p2align, $vec, $idx", name#"\t$off$p2align, $idx", simdop>; - defm STORE_LANE_#vec_t#_A64 : + defm STORE_LANE_#vec#_A64 : SIMD_I<(outs V128:$dst), (ins P2Align:$p2align, offset64_op:$off, vec_i8imm_op:$idx, I64:$addr, V128:$vec), @@ -293,27 +351,27 @@ multiclass SIMDStoreLane simdop> { // TODO: Also support v4f32 and v2f64 once the instructions are merged // to the proposal -defm "" : SIMDStoreLane; -defm "" : SIMDStoreLane; -defm "" : SIMDStoreLane; -defm "" : SIMDStoreLane; +defm "" : SIMDStoreLane; +defm "" : SIMDStoreLane; +defm "" : SIMDStoreLane; +defm "" : SIMDStoreLane; // Select stores with no constant offset. -multiclass StoreLanePatNoOffset { - def : Pat<(kind (i32 I32:$addr), (ty V128:$vec), (i32 lane_imm_t:$idx)), - (!cast("STORE_LANE_"#ty#"_A32") - 0, 0, imm:$idx, I32:$addr, ty:$vec)>, +multiclass StoreLanePatNoOffset { + def : Pat<(kind (i32 I32:$addr), (vec.vt V128:$vec), (i32 vec.lane_idx:$idx)), + (!cast("STORE_LANE_"#vec#"_A32") + 0, 0, imm:$idx, I32:$addr, vec.vt:$vec)>, Requires<[HasAddr32]>; - def : Pat<(kind (i64 I64:$addr), (ty V128:$vec), (i32 lane_imm_t:$idx)), - (!cast("STORE_LANE_"#ty#"_A64") - 0, 0, imm:$idx, I64:$addr, ty:$vec)>, + def : Pat<(kind (i64 I64:$addr), (vec.vt V128:$vec), (i32 vec.lane_idx:$idx)), + (!cast("STORE_LANE_"#vec#"_A64") + 0, 0, imm:$idx, I64:$addr, vec.vt:$vec)>, Requires<[HasAddr64]>; } -defm : StoreLanePatNoOffset; -defm : StoreLanePatNoOffset; -defm : StoreLanePatNoOffset; -defm : StoreLanePatNoOffset; +defm : StoreLanePatNoOffset; +defm : StoreLanePatNoOffset; +defm : StoreLanePatNoOffset; +defm : StoreLanePatNoOffset; // TODO: Also support the other store patterns for store_lane once the // instructions are merged to the proposal. From 3c707d73f26f4189a21e610210d01c0059f4fd01 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Tue, 22 Dec 2020 20:13:27 -0800 Subject: [PATCH 154/378] [NewGVN] Remove for_each_found (NFC) The last use of the function was removed on Sep 30, 2017 in commit 9b926e90d33e0f71c16618365333fc7b330b6bb5. --- llvm/lib/Transforms/Scalar/NewGVN.cpp | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/NewGVN.cpp b/llvm/lib/Transforms/Scalar/NewGVN.cpp index 958014fcb1ed9..ee26c299c88c6 100644 --- a/llvm/lib/Transforms/Scalar/NewGVN.cpp +++ b/llvm/lib/Transforms/Scalar/NewGVN.cpp @@ -802,8 +802,6 @@ class NewGVN { const BasicBlock *) const; // Various instruction touch utilities - template - void for_each_found(Map &, const KeyType &, Func); template void touchAndErase(Map &, const KeyType &); void markUsersTouched(Value *); @@ -1991,16 +1989,6 @@ NewGVN::performSymbolicEvaluation(Value *V, return E; } -// Look up a container in a map, and then call a function for each thing in the -// found container. -template -void NewGVN::for_each_found(Map &M, const KeyType &Key, Func F) { - const auto Result = M.find_as(Key); - if (Result != M.end()) - for (typename Map::mapped_type::value_type Mapped : Result->second) - F(Mapped); -} - // Look up a container of values/instructions in a map, and touch all the // instructions in the container. Then erase value from the map. template From 0219cf7dfafa45ef82fcd92f48e4b614da866d51 Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Tue, 22 Dec 2020 21:40:43 -0800 Subject: [PATCH 155/378] [NewPM] Fix objc-arc-apelim pass typo --- llvm/lib/Passes/PassRegistry.def | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index 977bd46b06337..b00a7bd14b4fd 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -81,7 +81,7 @@ MODULE_PASS("mergefunc", MergeFunctionsPass()) MODULE_PASS("name-anon-globals", NameAnonGlobalPass()) MODULE_PASS("no-op-module", NoOpModulePass()) MODULE_PASS("objc-arc", ObjCARCOptPass()) -MODULE_PASS("objc-arc-apelim", ObjCARCOptPass()) +MODULE_PASS("objc-arc-apelim", ObjCARCAPElimPass()) MODULE_PASS("objc-arc-contract", ObjCARCContractPass()) MODULE_PASS("partial-inliner", PartialInlinerPass()) MODULE_PASS("pgo-icall-prom", PGOIndirectCallPromotion()) From 4d479443934aedb9d93725ca54bdbed52ed14baf Mon Sep 17 00:00:00 2001 From: Evandro Menezes Date: Wed, 23 Dec 2020 00:27:38 -0600 Subject: [PATCH 156/378] [RISCV] Define the vfmin, vfmax RVV intrinsics Define the vfmin, vfmax IR intrinsics for the respective V instructions. Authored-by: Roger Ferrer Ibanez Co-Authored-by: Evandro Menezes Differential Revision: https://reviews.llvm.org/D93673 --- llvm/include/llvm/IR/IntrinsicsRISCV.td | 3 + .../Target/RISCV/RISCVInstrInfoVPseudos.td | 12 + llvm/test/CodeGen/RISCV/rvv/vfmax-rv32.ll | 881 ++++++++++++ llvm/test/CodeGen/RISCV/rvv/vfmax-rv64.ll | 1201 +++++++++++++++++ llvm/test/CodeGen/RISCV/rvv/vfmin-rv32.ll | 881 ++++++++++++ llvm/test/CodeGen/RISCV/rvv/vfmin-rv64.ll | 1201 +++++++++++++++++ 6 files changed, 4179 insertions(+) create mode 100644 llvm/test/CodeGen/RISCV/rvv/vfmax-rv32.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vfmax-rv64.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vfmin-rv32.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vfmin-rv64.ll diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td index dc080db5b30f5..4e075a8f2db76 100644 --- a/llvm/include/llvm/IR/IntrinsicsRISCV.td +++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td @@ -560,6 +560,9 @@ let TargetPrefix = "riscv" in { defm vfmsub : RISCVTernaryAAXA; defm vfnmsub : RISCVTernaryAAXA; + defm vfmin : RISCVBinaryAAX; + defm vfmax : RISCVBinaryAAX; + defm vfsgnj : RISCVBinaryAAX; defm vfsgnjn : RISCVBinaryAAX; defm vfsgnjx : RISCVBinaryAAX; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td index 5c858b0e09825..da3a9242acda3 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -1953,6 +1953,12 @@ defm PseudoVFNMADD : VPseudoTernaryV_VV_VX_AAXA; defm PseudoVFMSUB : VPseudoTernaryV_VV_VX_AAXA; defm PseudoVFNMSUB : VPseudoTernaryV_VV_VX_AAXA; +//===----------------------------------------------------------------------===// +// 14.9. Vector Floating-Point Min/Max Instructions +//===----------------------------------------------------------------------===// +defm PseudoVFMIN : VPseudoBinaryV_VV_VX; +defm PseudoVFMAX : VPseudoBinaryV_VV_VX; + //===----------------------------------------------------------------------===// // 14.12. Vector Floating-Point Sign-Injection Instructions //===----------------------------------------------------------------------===// @@ -2346,6 +2352,12 @@ defm "" : VPatTernaryV_VV_VX_AAXA<"int_riscv_vfnmadd", "PseudoVFNMADD", AllFloat defm "" : VPatTernaryV_VV_VX_AAXA<"int_riscv_vfmsub", "PseudoVFMSUB", AllFloatVectors>; defm "" : VPatTernaryV_VV_VX_AAXA<"int_riscv_vfnmsub", "PseudoVFNMSUB", AllFloatVectors>; +//===----------------------------------------------------------------------===// +// 14.9. Vector Floating-Point Min/Max Instructions +//===----------------------------------------------------------------------===// +defm "" : VPatBinaryV_VV_VX<"int_riscv_vfmin", "PseudoVFMIN", AllFloatVectors>; +defm "" : VPatBinaryV_VV_VX<"int_riscv_vfmax", "PseudoVFMAX", AllFloatVectors>; + //===----------------------------------------------------------------------===// // 14.12. Vector Floating-Point Sign-Injection Instructions //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmax-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfmax-rv32.ll new file mode 100644 index 0000000000000..5c7b025f4f9ce --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfmax-rv32.ll @@ -0,0 +1,881 @@ +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f,+experimental-zfh -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare @llvm.riscv.vfmax.nxv1f16.nxv1f16( + , + , + i32); + +define @intrinsic_vfmax_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_vv_nxv1f16_nxv1f16_nxv1f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vfmax.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vfmax.nxv1f16.nxv1f16( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vfmax.mask.nxv1f16.nxv1f16( + , + , + , + , + i32); + +define @intrinsic_vfmax_mask_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv1f16_nxv1f16_nxv1f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vfmax.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmax.mask.nxv1f16.nxv1f16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmax.nxv2f16.nxv2f16( + , + , + i32); + +define @intrinsic_vfmax_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_vv_nxv2f16_nxv2f16_nxv2f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vfmax.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vfmax.nxv2f16.nxv2f16( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vfmax.mask.nxv2f16.nxv2f16( + , + , + , + , + i32); + +define @intrinsic_vfmax_mask_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv2f16_nxv2f16_nxv2f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vfmax.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmax.mask.nxv2f16.nxv2f16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmax.nxv4f16.nxv4f16( + , + , + i32); + +define @intrinsic_vfmax_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_vv_nxv4f16_nxv4f16_nxv4f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vfmax.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vfmax.nxv4f16.nxv4f16( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vfmax.mask.nxv4f16.nxv4f16( + , + , + , + , + i32); + +define @intrinsic_vfmax_mask_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv4f16_nxv4f16_nxv4f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vfmax.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmax.mask.nxv4f16.nxv4f16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmax.nxv8f16.nxv8f16( + , + , + i32); + +define @intrinsic_vfmax_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_vv_nxv8f16_nxv8f16_nxv8f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vfmax.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vfmax.nxv8f16.nxv8f16( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vfmax.mask.nxv8f16.nxv8f16( + , + , + , + , + i32); + +define @intrinsic_vfmax_mask_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv8f16_nxv8f16_nxv8f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vfmax.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmax.mask.nxv8f16.nxv8f16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmax.nxv16f16.nxv16f16( + , + , + i32); + +define @intrinsic_vfmax_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_vv_nxv16f16_nxv16f16_nxv16f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vfmax.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vfmax.nxv16f16.nxv16f16( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vfmax.mask.nxv16f16.nxv16f16( + , + , + , + , + i32); + +define @intrinsic_vfmax_mask_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv16f16_nxv16f16_nxv16f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vfmax.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmax.mask.nxv16f16.nxv16f16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmax.nxv32f16.nxv32f16( + , + , + i32); + +define @intrinsic_vfmax_vv_nxv32f16_nxv32f16_nxv32f16( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_vv_nxv32f16_nxv32f16_nxv32f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m8,ta,mu +; CHECK: vfmax.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vfmax.nxv32f16.nxv32f16( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vfmax.mask.nxv32f16.nxv32f16( + , + , + , + , + i32); + +define @intrinsic_vfmax_mask_vv_nxv32f16_nxv32f16_nxv32f16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv32f16_nxv32f16_nxv32f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m8,ta,mu +; CHECK: vfmax.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmax.mask.nxv32f16.nxv32f16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmax.nxv1f32.nxv1f32( + , + , + i32); + +define @intrinsic_vfmax_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_vv_nxv1f32_nxv1f32_nxv1f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vfmax.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vfmax.nxv1f32.nxv1f32( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vfmax.mask.nxv1f32.nxv1f32( + , + , + , + , + i32); + +define @intrinsic_vfmax_mask_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv1f32_nxv1f32_nxv1f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vfmax.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmax.mask.nxv1f32.nxv1f32( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmax.nxv2f32.nxv2f32( + , + , + i32); + +define @intrinsic_vfmax_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_vv_nxv2f32_nxv2f32_nxv2f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vfmax.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vfmax.nxv2f32.nxv2f32( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vfmax.mask.nxv2f32.nxv2f32( + , + , + , + , + i32); + +define @intrinsic_vfmax_mask_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv2f32_nxv2f32_nxv2f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vfmax.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmax.mask.nxv2f32.nxv2f32( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmax.nxv4f32.nxv4f32( + , + , + i32); + +define @intrinsic_vfmax_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_vv_nxv4f32_nxv4f32_nxv4f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vfmax.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vfmax.nxv4f32.nxv4f32( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vfmax.mask.nxv4f32.nxv4f32( + , + , + , + , + i32); + +define @intrinsic_vfmax_mask_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv4f32_nxv4f32_nxv4f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vfmax.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmax.mask.nxv4f32.nxv4f32( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmax.nxv8f32.nxv8f32( + , + , + i32); + +define @intrinsic_vfmax_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_vv_nxv8f32_nxv8f32_nxv8f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vfmax.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vfmax.nxv8f32.nxv8f32( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vfmax.mask.nxv8f32.nxv8f32( + , + , + , + , + i32); + +define @intrinsic_vfmax_mask_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv8f32_nxv8f32_nxv8f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vfmax.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmax.mask.nxv8f32.nxv8f32( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmax.nxv16f32.nxv16f32( + , + , + i32); + +define @intrinsic_vfmax_vv_nxv16f32_nxv16f32_nxv16f32( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_vv_nxv16f32_nxv16f32_nxv16f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m8,ta,mu +; CHECK: vfmax.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vfmax.nxv16f32.nxv16f32( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vfmax.mask.nxv16f32.nxv16f32( + , + , + , + , + i32); + +define @intrinsic_vfmax_mask_vv_nxv16f32_nxv16f32_nxv16f32( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv16f32_nxv16f32_nxv16f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m8,ta,mu +; CHECK: vfmax.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmax.mask.nxv16f32.nxv16f32( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmax.nxv1f16.f16( + , + half, + i32); + +define @intrinsic_vfmax_vf_nxv1f16_nxv1f16_f16( %0, half %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_vf_nxv1f16_nxv1f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vfmax.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}} + %a = call @llvm.riscv.vfmax.nxv1f16.f16( + %0, + half %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vfmax.mask.nxv1f16.f16( + , + , + half, + , + i32); + +define @intrinsic_vfmax_mask_vf_nxv1f16_nxv1f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv1f16_nxv1f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vfmax.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmax.mask.nxv1f16.f16( + %0, + %1, + half %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmax.nxv2f16.f16( + , + half, + i32); + +define @intrinsic_vfmax_vf_nxv2f16_nxv2f16_f16( %0, half %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_vf_nxv2f16_nxv2f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vfmax.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}} + %a = call @llvm.riscv.vfmax.nxv2f16.f16( + %0, + half %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vfmax.mask.nxv2f16.f16( + , + , + half, + , + i32); + +define @intrinsic_vfmax_mask_vf_nxv2f16_nxv2f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv2f16_nxv2f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vfmax.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmax.mask.nxv2f16.f16( + %0, + %1, + half %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmax.nxv4f16.f16( + , + half, + i32); + +define @intrinsic_vfmax_vf_nxv4f16_nxv4f16_f16( %0, half %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_vf_nxv4f16_nxv4f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vfmax.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}} + %a = call @llvm.riscv.vfmax.nxv4f16.f16( + %0, + half %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vfmax.mask.nxv4f16.f16( + , + , + half, + , + i32); + +define @intrinsic_vfmax_mask_vf_nxv4f16_nxv4f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv4f16_nxv4f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vfmax.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmax.mask.nxv4f16.f16( + %0, + %1, + half %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmax.nxv8f16.f16( + , + half, + i32); + +define @intrinsic_vfmax_vf_nxv8f16_nxv8f16_f16( %0, half %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_vf_nxv8f16_nxv8f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vfmax.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}} + %a = call @llvm.riscv.vfmax.nxv8f16.f16( + %0, + half %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vfmax.mask.nxv8f16.f16( + , + , + half, + , + i32); + +define @intrinsic_vfmax_mask_vf_nxv8f16_nxv8f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv8f16_nxv8f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vfmax.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmax.mask.nxv8f16.f16( + %0, + %1, + half %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmax.nxv16f16.f16( + , + half, + i32); + +define @intrinsic_vfmax_vf_nxv16f16_nxv16f16_f16( %0, half %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_vf_nxv16f16_nxv16f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vfmax.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}} + %a = call @llvm.riscv.vfmax.nxv16f16.f16( + %0, + half %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vfmax.mask.nxv16f16.f16( + , + , + half, + , + i32); + +define @intrinsic_vfmax_mask_vf_nxv16f16_nxv16f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv16f16_nxv16f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vfmax.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmax.mask.nxv16f16.f16( + %0, + %1, + half %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmax.nxv32f16.f16( + , + half, + i32); + +define @intrinsic_vfmax_vf_nxv32f16_nxv32f16_f16( %0, half %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_vf_nxv32f16_nxv32f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m8,ta,mu +; CHECK: vfmax.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}} + %a = call @llvm.riscv.vfmax.nxv32f16.f16( + %0, + half %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vfmax.mask.nxv32f16.f16( + , + , + half, + , + i32); + +define @intrinsic_vfmax_mask_vf_nxv32f16_nxv32f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv32f16_nxv32f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m8,ta,mu +; CHECK: vfmax.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmax.mask.nxv32f16.f16( + %0, + %1, + half %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmax.nxv1f32.f32( + , + float, + i32); + +define @intrinsic_vfmax_vf_nxv1f32_nxv1f32_f32( %0, float %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_vf_nxv1f32_nxv1f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vfmax.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}} + %a = call @llvm.riscv.vfmax.nxv1f32.f32( + %0, + float %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vfmax.mask.nxv1f32.f32( + , + , + float, + , + i32); + +define @intrinsic_vfmax_mask_vf_nxv1f32_nxv1f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv1f32_nxv1f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vfmax.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmax.mask.nxv1f32.f32( + %0, + %1, + float %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmax.nxv2f32.f32( + , + float, + i32); + +define @intrinsic_vfmax_vf_nxv2f32_nxv2f32_f32( %0, float %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_vf_nxv2f32_nxv2f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vfmax.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}} + %a = call @llvm.riscv.vfmax.nxv2f32.f32( + %0, + float %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vfmax.mask.nxv2f32.f32( + , + , + float, + , + i32); + +define @intrinsic_vfmax_mask_vf_nxv2f32_nxv2f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv2f32_nxv2f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vfmax.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmax.mask.nxv2f32.f32( + %0, + %1, + float %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmax.nxv4f32.f32( + , + float, + i32); + +define @intrinsic_vfmax_vf_nxv4f32_nxv4f32_f32( %0, float %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_vf_nxv4f32_nxv4f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vfmax.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}} + %a = call @llvm.riscv.vfmax.nxv4f32.f32( + %0, + float %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vfmax.mask.nxv4f32.f32( + , + , + float, + , + i32); + +define @intrinsic_vfmax_mask_vf_nxv4f32_nxv4f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv4f32_nxv4f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vfmax.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmax.mask.nxv4f32.f32( + %0, + %1, + float %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmax.nxv8f32.f32( + , + float, + i32); + +define @intrinsic_vfmax_vf_nxv8f32_nxv8f32_f32( %0, float %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_vf_nxv8f32_nxv8f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vfmax.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}} + %a = call @llvm.riscv.vfmax.nxv8f32.f32( + %0, + float %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vfmax.mask.nxv8f32.f32( + , + , + float, + , + i32); + +define @intrinsic_vfmax_mask_vf_nxv8f32_nxv8f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv8f32_nxv8f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vfmax.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmax.mask.nxv8f32.f32( + %0, + %1, + float %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmax.nxv16f32.f32( + , + float, + i32); + +define @intrinsic_vfmax_vf_nxv16f32_nxv16f32_f32( %0, float %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_vf_nxv16f32_nxv16f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m8,ta,mu +; CHECK: vfmax.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}} + %a = call @llvm.riscv.vfmax.nxv16f32.f32( + %0, + float %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vfmax.mask.nxv16f32.f32( + , + , + float, + , + i32); + +define @intrinsic_vfmax_mask_vf_nxv16f32_nxv16f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv16f32_nxv16f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m8,ta,mu +; CHECK: vfmax.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmax.mask.nxv16f32.f32( + %0, + %1, + float %2, + %3, + i32 %4) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmax-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfmax-rv64.ll new file mode 100644 index 0000000000000..c24eccdb84a81 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfmax-rv64.ll @@ -0,0 +1,1201 @@ +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+experimental-zfh -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare @llvm.riscv.vfmax.nxv1f16.nxv1f16( + , + , + i64); + +define @intrinsic_vfmax_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_vv_nxv1f16_nxv1f16_nxv1f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vfmax.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vfmax.nxv1f16.nxv1f16( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfmax.mask.nxv1f16.nxv1f16( + , + , + , + , + i64); + +define @intrinsic_vfmax_mask_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv1f16_nxv1f16_nxv1f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vfmax.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmax.mask.nxv1f16.nxv1f16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmax.nxv2f16.nxv2f16( + , + , + i64); + +define @intrinsic_vfmax_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_vv_nxv2f16_nxv2f16_nxv2f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vfmax.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vfmax.nxv2f16.nxv2f16( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfmax.mask.nxv2f16.nxv2f16( + , + , + , + , + i64); + +define @intrinsic_vfmax_mask_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv2f16_nxv2f16_nxv2f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vfmax.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmax.mask.nxv2f16.nxv2f16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmax.nxv4f16.nxv4f16( + , + , + i64); + +define @intrinsic_vfmax_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_vv_nxv4f16_nxv4f16_nxv4f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vfmax.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vfmax.nxv4f16.nxv4f16( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfmax.mask.nxv4f16.nxv4f16( + , + , + , + , + i64); + +define @intrinsic_vfmax_mask_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv4f16_nxv4f16_nxv4f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vfmax.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmax.mask.nxv4f16.nxv4f16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmax.nxv8f16.nxv8f16( + , + , + i64); + +define @intrinsic_vfmax_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_vv_nxv8f16_nxv8f16_nxv8f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vfmax.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vfmax.nxv8f16.nxv8f16( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfmax.mask.nxv8f16.nxv8f16( + , + , + , + , + i64); + +define @intrinsic_vfmax_mask_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv8f16_nxv8f16_nxv8f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vfmax.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmax.mask.nxv8f16.nxv8f16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmax.nxv16f16.nxv16f16( + , + , + i64); + +define @intrinsic_vfmax_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_vv_nxv16f16_nxv16f16_nxv16f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vfmax.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vfmax.nxv16f16.nxv16f16( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfmax.mask.nxv16f16.nxv16f16( + , + , + , + , + i64); + +define @intrinsic_vfmax_mask_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv16f16_nxv16f16_nxv16f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vfmax.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmax.mask.nxv16f16.nxv16f16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmax.nxv32f16.nxv32f16( + , + , + i64); + +define @intrinsic_vfmax_vv_nxv32f16_nxv32f16_nxv32f16( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_vv_nxv32f16_nxv32f16_nxv32f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m8,ta,mu +; CHECK: vfmax.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vfmax.nxv32f16.nxv32f16( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfmax.mask.nxv32f16.nxv32f16( + , + , + , + , + i64); + +define @intrinsic_vfmax_mask_vv_nxv32f16_nxv32f16_nxv32f16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv32f16_nxv32f16_nxv32f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m8,ta,mu +; CHECK: vfmax.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmax.mask.nxv32f16.nxv32f16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmax.nxv1f32.nxv1f32( + , + , + i64); + +define @intrinsic_vfmax_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_vv_nxv1f32_nxv1f32_nxv1f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vfmax.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vfmax.nxv1f32.nxv1f32( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfmax.mask.nxv1f32.nxv1f32( + , + , + , + , + i64); + +define @intrinsic_vfmax_mask_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv1f32_nxv1f32_nxv1f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vfmax.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmax.mask.nxv1f32.nxv1f32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmax.nxv2f32.nxv2f32( + , + , + i64); + +define @intrinsic_vfmax_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_vv_nxv2f32_nxv2f32_nxv2f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vfmax.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vfmax.nxv2f32.nxv2f32( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfmax.mask.nxv2f32.nxv2f32( + , + , + , + , + i64); + +define @intrinsic_vfmax_mask_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv2f32_nxv2f32_nxv2f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vfmax.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmax.mask.nxv2f32.nxv2f32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmax.nxv4f32.nxv4f32( + , + , + i64); + +define @intrinsic_vfmax_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_vv_nxv4f32_nxv4f32_nxv4f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vfmax.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vfmax.nxv4f32.nxv4f32( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfmax.mask.nxv4f32.nxv4f32( + , + , + , + , + i64); + +define @intrinsic_vfmax_mask_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv4f32_nxv4f32_nxv4f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vfmax.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmax.mask.nxv4f32.nxv4f32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmax.nxv8f32.nxv8f32( + , + , + i64); + +define @intrinsic_vfmax_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_vv_nxv8f32_nxv8f32_nxv8f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vfmax.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vfmax.nxv8f32.nxv8f32( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfmax.mask.nxv8f32.nxv8f32( + , + , + , + , + i64); + +define @intrinsic_vfmax_mask_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv8f32_nxv8f32_nxv8f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vfmax.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmax.mask.nxv8f32.nxv8f32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmax.nxv16f32.nxv16f32( + , + , + i64); + +define @intrinsic_vfmax_vv_nxv16f32_nxv16f32_nxv16f32( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_vv_nxv16f32_nxv16f32_nxv16f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m8,ta,mu +; CHECK: vfmax.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vfmax.nxv16f32.nxv16f32( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfmax.mask.nxv16f32.nxv16f32( + , + , + , + , + i64); + +define @intrinsic_vfmax_mask_vv_nxv16f32_nxv16f32_nxv16f32( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv16f32_nxv16f32_nxv16f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m8,ta,mu +; CHECK: vfmax.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmax.mask.nxv16f32.nxv16f32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmax.nxv1f64.nxv1f64( + , + , + i64); + +define @intrinsic_vfmax_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_vv_nxv1f64_nxv1f64_nxv1f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vfmax.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vfmax.nxv1f64.nxv1f64( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfmax.mask.nxv1f64.nxv1f64( + , + , + , + , + i64); + +define @intrinsic_vfmax_mask_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv1f64_nxv1f64_nxv1f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vfmax.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmax.mask.nxv1f64.nxv1f64( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmax.nxv2f64.nxv2f64( + , + , + i64); + +define @intrinsic_vfmax_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_vv_nxv2f64_nxv2f64_nxv2f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vfmax.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vfmax.nxv2f64.nxv2f64( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfmax.mask.nxv2f64.nxv2f64( + , + , + , + , + i64); + +define @intrinsic_vfmax_mask_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv2f64_nxv2f64_nxv2f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vfmax.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmax.mask.nxv2f64.nxv2f64( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmax.nxv4f64.nxv4f64( + , + , + i64); + +define @intrinsic_vfmax_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_vv_nxv4f64_nxv4f64_nxv4f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vfmax.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vfmax.nxv4f64.nxv4f64( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfmax.mask.nxv4f64.nxv4f64( + , + , + , + , + i64); + +define @intrinsic_vfmax_mask_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv4f64_nxv4f64_nxv4f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vfmax.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmax.mask.nxv4f64.nxv4f64( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmax.nxv8f64.nxv8f64( + , + , + i64); + +define @intrinsic_vfmax_vv_nxv8f64_nxv8f64_nxv8f64( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_vv_nxv8f64_nxv8f64_nxv8f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m8,ta,mu +; CHECK: vfmax.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vfmax.nxv8f64.nxv8f64( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfmax.mask.nxv8f64.nxv8f64( + , + , + , + , + i64); + +define @intrinsic_vfmax_mask_vv_nxv8f64_nxv8f64_nxv8f64( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv8f64_nxv8f64_nxv8f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m8,ta,mu +; CHECK: vfmax.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmax.mask.nxv8f64.nxv8f64( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmax.nxv1f16.f16( + , + half, + i64); + +define @intrinsic_vfmax_vf_nxv1f16_nxv1f16_f16( %0, half %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_vf_nxv1f16_nxv1f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vfmax.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}} + %a = call @llvm.riscv.vfmax.nxv1f16.f16( + %0, + half %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfmax.mask.nxv1f16.f16( + , + , + half, + , + i64); + +define @intrinsic_vfmax_mask_vf_nxv1f16_nxv1f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv1f16_nxv1f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vfmax.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmax.mask.nxv1f16.f16( + %0, + %1, + half %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmax.nxv2f16.f16( + , + half, + i64); + +define @intrinsic_vfmax_vf_nxv2f16_nxv2f16_f16( %0, half %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_vf_nxv2f16_nxv2f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vfmax.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}} + %a = call @llvm.riscv.vfmax.nxv2f16.f16( + %0, + half %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfmax.mask.nxv2f16.f16( + , + , + half, + , + i64); + +define @intrinsic_vfmax_mask_vf_nxv2f16_nxv2f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv2f16_nxv2f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vfmax.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmax.mask.nxv2f16.f16( + %0, + %1, + half %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmax.nxv4f16.f16( + , + half, + i64); + +define @intrinsic_vfmax_vf_nxv4f16_nxv4f16_f16( %0, half %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_vf_nxv4f16_nxv4f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vfmax.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}} + %a = call @llvm.riscv.vfmax.nxv4f16.f16( + %0, + half %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfmax.mask.nxv4f16.f16( + , + , + half, + , + i64); + +define @intrinsic_vfmax_mask_vf_nxv4f16_nxv4f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv4f16_nxv4f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vfmax.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmax.mask.nxv4f16.f16( + %0, + %1, + half %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmax.nxv8f16.f16( + , + half, + i64); + +define @intrinsic_vfmax_vf_nxv8f16_nxv8f16_f16( %0, half %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_vf_nxv8f16_nxv8f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vfmax.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}} + %a = call @llvm.riscv.vfmax.nxv8f16.f16( + %0, + half %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfmax.mask.nxv8f16.f16( + , + , + half, + , + i64); + +define @intrinsic_vfmax_mask_vf_nxv8f16_nxv8f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv8f16_nxv8f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vfmax.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmax.mask.nxv8f16.f16( + %0, + %1, + half %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmax.nxv16f16.f16( + , + half, + i64); + +define @intrinsic_vfmax_vf_nxv16f16_nxv16f16_f16( %0, half %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_vf_nxv16f16_nxv16f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vfmax.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}} + %a = call @llvm.riscv.vfmax.nxv16f16.f16( + %0, + half %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfmax.mask.nxv16f16.f16( + , + , + half, + , + i64); + +define @intrinsic_vfmax_mask_vf_nxv16f16_nxv16f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv16f16_nxv16f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vfmax.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmax.mask.nxv16f16.f16( + %0, + %1, + half %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmax.nxv32f16.f16( + , + half, + i64); + +define @intrinsic_vfmax_vf_nxv32f16_nxv32f16_f16( %0, half %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_vf_nxv32f16_nxv32f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m8,ta,mu +; CHECK: vfmax.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}} + %a = call @llvm.riscv.vfmax.nxv32f16.f16( + %0, + half %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfmax.mask.nxv32f16.f16( + , + , + half, + , + i64); + +define @intrinsic_vfmax_mask_vf_nxv32f16_nxv32f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv32f16_nxv32f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m8,ta,mu +; CHECK: vfmax.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmax.mask.nxv32f16.f16( + %0, + %1, + half %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmax.nxv1f32.f32( + , + float, + i64); + +define @intrinsic_vfmax_vf_nxv1f32_nxv1f32_f32( %0, float %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_vf_nxv1f32_nxv1f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vfmax.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}} + %a = call @llvm.riscv.vfmax.nxv1f32.f32( + %0, + float %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfmax.mask.nxv1f32.f32( + , + , + float, + , + i64); + +define @intrinsic_vfmax_mask_vf_nxv1f32_nxv1f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv1f32_nxv1f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vfmax.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmax.mask.nxv1f32.f32( + %0, + %1, + float %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmax.nxv2f32.f32( + , + float, + i64); + +define @intrinsic_vfmax_vf_nxv2f32_nxv2f32_f32( %0, float %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_vf_nxv2f32_nxv2f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vfmax.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}} + %a = call @llvm.riscv.vfmax.nxv2f32.f32( + %0, + float %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfmax.mask.nxv2f32.f32( + , + , + float, + , + i64); + +define @intrinsic_vfmax_mask_vf_nxv2f32_nxv2f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv2f32_nxv2f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vfmax.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmax.mask.nxv2f32.f32( + %0, + %1, + float %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmax.nxv4f32.f32( + , + float, + i64); + +define @intrinsic_vfmax_vf_nxv4f32_nxv4f32_f32( %0, float %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_vf_nxv4f32_nxv4f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vfmax.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}} + %a = call @llvm.riscv.vfmax.nxv4f32.f32( + %0, + float %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfmax.mask.nxv4f32.f32( + , + , + float, + , + i64); + +define @intrinsic_vfmax_mask_vf_nxv4f32_nxv4f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv4f32_nxv4f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vfmax.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmax.mask.nxv4f32.f32( + %0, + %1, + float %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmax.nxv8f32.f32( + , + float, + i64); + +define @intrinsic_vfmax_vf_nxv8f32_nxv8f32_f32( %0, float %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_vf_nxv8f32_nxv8f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vfmax.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}} + %a = call @llvm.riscv.vfmax.nxv8f32.f32( + %0, + float %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfmax.mask.nxv8f32.f32( + , + , + float, + , + i64); + +define @intrinsic_vfmax_mask_vf_nxv8f32_nxv8f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv8f32_nxv8f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vfmax.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmax.mask.nxv8f32.f32( + %0, + %1, + float %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmax.nxv16f32.f32( + , + float, + i64); + +define @intrinsic_vfmax_vf_nxv16f32_nxv16f32_f32( %0, float %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_vf_nxv16f32_nxv16f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m8,ta,mu +; CHECK: vfmax.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}} + %a = call @llvm.riscv.vfmax.nxv16f32.f32( + %0, + float %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfmax.mask.nxv16f32.f32( + , + , + float, + , + i64); + +define @intrinsic_vfmax_mask_vf_nxv16f32_nxv16f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv16f32_nxv16f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m8,ta,mu +; CHECK: vfmax.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmax.mask.nxv16f32.f32( + %0, + %1, + float %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmax.nxv1f64.f64( + , + double, + i64); + +define @intrinsic_vfmax_vf_nxv1f64_nxv1f64_f64( %0, double %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_vf_nxv1f64_nxv1f64_f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vfmax.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}} + %a = call @llvm.riscv.vfmax.nxv1f64.f64( + %0, + double %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfmax.mask.nxv1f64.f64( + , + , + double, + , + i64); + +define @intrinsic_vfmax_mask_vf_nxv1f64_nxv1f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv1f64_nxv1f64_f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vfmax.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmax.mask.nxv1f64.f64( + %0, + %1, + double %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmax.nxv2f64.f64( + , + double, + i64); + +define @intrinsic_vfmax_vf_nxv2f64_nxv2f64_f64( %0, double %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_vf_nxv2f64_nxv2f64_f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vfmax.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}} + %a = call @llvm.riscv.vfmax.nxv2f64.f64( + %0, + double %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfmax.mask.nxv2f64.f64( + , + , + double, + , + i64); + +define @intrinsic_vfmax_mask_vf_nxv2f64_nxv2f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv2f64_nxv2f64_f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vfmax.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmax.mask.nxv2f64.f64( + %0, + %1, + double %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmax.nxv4f64.f64( + , + double, + i64); + +define @intrinsic_vfmax_vf_nxv4f64_nxv4f64_f64( %0, double %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_vf_nxv4f64_nxv4f64_f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vfmax.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}} + %a = call @llvm.riscv.vfmax.nxv4f64.f64( + %0, + double %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfmax.mask.nxv4f64.f64( + , + , + double, + , + i64); + +define @intrinsic_vfmax_mask_vf_nxv4f64_nxv4f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv4f64_nxv4f64_f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vfmax.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmax.mask.nxv4f64.f64( + %0, + %1, + double %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmax.nxv8f64.f64( + , + double, + i64); + +define @intrinsic_vfmax_vf_nxv8f64_nxv8f64_f64( %0, double %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_vf_nxv8f64_nxv8f64_f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m8,ta,mu +; CHECK: vfmax.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}} + %a = call @llvm.riscv.vfmax.nxv8f64.f64( + %0, + double %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfmax.mask.nxv8f64.f64( + , + , + double, + , + i64); + +define @intrinsic_vfmax_mask_vf_nxv8f64_nxv8f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv8f64_nxv8f64_f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m8,ta,mu +; CHECK: vfmax.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmax.mask.nxv8f64.f64( + %0, + %1, + double %2, + %3, + i64 %4) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmin-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfmin-rv32.ll new file mode 100644 index 0000000000000..ea6f019ebd61f --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfmin-rv32.ll @@ -0,0 +1,881 @@ +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f,+experimental-zfh -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare @llvm.riscv.vfmin.nxv1f16.nxv1f16( + , + , + i32); + +define @intrinsic_vfmin_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_vv_nxv1f16_nxv1f16_nxv1f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vfmin.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vfmin.nxv1f16.nxv1f16( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vfmin.mask.nxv1f16.nxv1f16( + , + , + , + , + i32); + +define @intrinsic_vfmin_mask_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv1f16_nxv1f16_nxv1f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vfmin.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmin.mask.nxv1f16.nxv1f16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmin.nxv2f16.nxv2f16( + , + , + i32); + +define @intrinsic_vfmin_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_vv_nxv2f16_nxv2f16_nxv2f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vfmin.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vfmin.nxv2f16.nxv2f16( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vfmin.mask.nxv2f16.nxv2f16( + , + , + , + , + i32); + +define @intrinsic_vfmin_mask_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv2f16_nxv2f16_nxv2f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vfmin.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmin.mask.nxv2f16.nxv2f16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmin.nxv4f16.nxv4f16( + , + , + i32); + +define @intrinsic_vfmin_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_vv_nxv4f16_nxv4f16_nxv4f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vfmin.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vfmin.nxv4f16.nxv4f16( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vfmin.mask.nxv4f16.nxv4f16( + , + , + , + , + i32); + +define @intrinsic_vfmin_mask_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv4f16_nxv4f16_nxv4f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vfmin.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmin.mask.nxv4f16.nxv4f16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmin.nxv8f16.nxv8f16( + , + , + i32); + +define @intrinsic_vfmin_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_vv_nxv8f16_nxv8f16_nxv8f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vfmin.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vfmin.nxv8f16.nxv8f16( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vfmin.mask.nxv8f16.nxv8f16( + , + , + , + , + i32); + +define @intrinsic_vfmin_mask_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv8f16_nxv8f16_nxv8f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vfmin.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmin.mask.nxv8f16.nxv8f16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmin.nxv16f16.nxv16f16( + , + , + i32); + +define @intrinsic_vfmin_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_vv_nxv16f16_nxv16f16_nxv16f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vfmin.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vfmin.nxv16f16.nxv16f16( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vfmin.mask.nxv16f16.nxv16f16( + , + , + , + , + i32); + +define @intrinsic_vfmin_mask_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv16f16_nxv16f16_nxv16f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vfmin.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmin.mask.nxv16f16.nxv16f16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmin.nxv32f16.nxv32f16( + , + , + i32); + +define @intrinsic_vfmin_vv_nxv32f16_nxv32f16_nxv32f16( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_vv_nxv32f16_nxv32f16_nxv32f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m8,ta,mu +; CHECK: vfmin.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vfmin.nxv32f16.nxv32f16( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vfmin.mask.nxv32f16.nxv32f16( + , + , + , + , + i32); + +define @intrinsic_vfmin_mask_vv_nxv32f16_nxv32f16_nxv32f16( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv32f16_nxv32f16_nxv32f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m8,ta,mu +; CHECK: vfmin.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmin.mask.nxv32f16.nxv32f16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmin.nxv1f32.nxv1f32( + , + , + i32); + +define @intrinsic_vfmin_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_vv_nxv1f32_nxv1f32_nxv1f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vfmin.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vfmin.nxv1f32.nxv1f32( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vfmin.mask.nxv1f32.nxv1f32( + , + , + , + , + i32); + +define @intrinsic_vfmin_mask_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv1f32_nxv1f32_nxv1f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vfmin.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmin.mask.nxv1f32.nxv1f32( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmin.nxv2f32.nxv2f32( + , + , + i32); + +define @intrinsic_vfmin_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_vv_nxv2f32_nxv2f32_nxv2f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vfmin.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vfmin.nxv2f32.nxv2f32( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vfmin.mask.nxv2f32.nxv2f32( + , + , + , + , + i32); + +define @intrinsic_vfmin_mask_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv2f32_nxv2f32_nxv2f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vfmin.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmin.mask.nxv2f32.nxv2f32( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmin.nxv4f32.nxv4f32( + , + , + i32); + +define @intrinsic_vfmin_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_vv_nxv4f32_nxv4f32_nxv4f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vfmin.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vfmin.nxv4f32.nxv4f32( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vfmin.mask.nxv4f32.nxv4f32( + , + , + , + , + i32); + +define @intrinsic_vfmin_mask_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv4f32_nxv4f32_nxv4f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vfmin.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmin.mask.nxv4f32.nxv4f32( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmin.nxv8f32.nxv8f32( + , + , + i32); + +define @intrinsic_vfmin_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_vv_nxv8f32_nxv8f32_nxv8f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vfmin.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vfmin.nxv8f32.nxv8f32( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vfmin.mask.nxv8f32.nxv8f32( + , + , + , + , + i32); + +define @intrinsic_vfmin_mask_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv8f32_nxv8f32_nxv8f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vfmin.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmin.mask.nxv8f32.nxv8f32( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmin.nxv16f32.nxv16f32( + , + , + i32); + +define @intrinsic_vfmin_vv_nxv16f32_nxv16f32_nxv16f32( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_vv_nxv16f32_nxv16f32_nxv16f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m8,ta,mu +; CHECK: vfmin.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vfmin.nxv16f32.nxv16f32( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vfmin.mask.nxv16f32.nxv16f32( + , + , + , + , + i32); + +define @intrinsic_vfmin_mask_vv_nxv16f32_nxv16f32_nxv16f32( %0, %1, %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv16f32_nxv16f32_nxv16f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m8,ta,mu +; CHECK: vfmin.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmin.mask.nxv16f32.nxv16f32( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmin.nxv1f16.f16( + , + half, + i32); + +define @intrinsic_vfmin_vf_nxv1f16_nxv1f16_f16( %0, half %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_vf_nxv1f16_nxv1f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vfmin.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}} + %a = call @llvm.riscv.vfmin.nxv1f16.f16( + %0, + half %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vfmin.mask.nxv1f16.f16( + , + , + half, + , + i32); + +define @intrinsic_vfmin_mask_vf_nxv1f16_nxv1f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv1f16_nxv1f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vfmin.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmin.mask.nxv1f16.f16( + %0, + %1, + half %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmin.nxv2f16.f16( + , + half, + i32); + +define @intrinsic_vfmin_vf_nxv2f16_nxv2f16_f16( %0, half %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_vf_nxv2f16_nxv2f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vfmin.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}} + %a = call @llvm.riscv.vfmin.nxv2f16.f16( + %0, + half %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vfmin.mask.nxv2f16.f16( + , + , + half, + , + i32); + +define @intrinsic_vfmin_mask_vf_nxv2f16_nxv2f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv2f16_nxv2f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vfmin.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmin.mask.nxv2f16.f16( + %0, + %1, + half %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmin.nxv4f16.f16( + , + half, + i32); + +define @intrinsic_vfmin_vf_nxv4f16_nxv4f16_f16( %0, half %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_vf_nxv4f16_nxv4f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vfmin.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}} + %a = call @llvm.riscv.vfmin.nxv4f16.f16( + %0, + half %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vfmin.mask.nxv4f16.f16( + , + , + half, + , + i32); + +define @intrinsic_vfmin_mask_vf_nxv4f16_nxv4f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv4f16_nxv4f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vfmin.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmin.mask.nxv4f16.f16( + %0, + %1, + half %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmin.nxv8f16.f16( + , + half, + i32); + +define @intrinsic_vfmin_vf_nxv8f16_nxv8f16_f16( %0, half %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_vf_nxv8f16_nxv8f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vfmin.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}} + %a = call @llvm.riscv.vfmin.nxv8f16.f16( + %0, + half %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vfmin.mask.nxv8f16.f16( + , + , + half, + , + i32); + +define @intrinsic_vfmin_mask_vf_nxv8f16_nxv8f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv8f16_nxv8f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vfmin.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmin.mask.nxv8f16.f16( + %0, + %1, + half %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmin.nxv16f16.f16( + , + half, + i32); + +define @intrinsic_vfmin_vf_nxv16f16_nxv16f16_f16( %0, half %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_vf_nxv16f16_nxv16f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vfmin.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}} + %a = call @llvm.riscv.vfmin.nxv16f16.f16( + %0, + half %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vfmin.mask.nxv16f16.f16( + , + , + half, + , + i32); + +define @intrinsic_vfmin_mask_vf_nxv16f16_nxv16f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv16f16_nxv16f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vfmin.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmin.mask.nxv16f16.f16( + %0, + %1, + half %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmin.nxv32f16.f16( + , + half, + i32); + +define @intrinsic_vfmin_vf_nxv32f16_nxv32f16_f16( %0, half %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_vf_nxv32f16_nxv32f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m8,ta,mu +; CHECK: vfmin.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}} + %a = call @llvm.riscv.vfmin.nxv32f16.f16( + %0, + half %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vfmin.mask.nxv32f16.f16( + , + , + half, + , + i32); + +define @intrinsic_vfmin_mask_vf_nxv32f16_nxv32f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv32f16_nxv32f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m8,ta,mu +; CHECK: vfmin.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmin.mask.nxv32f16.f16( + %0, + %1, + half %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmin.nxv1f32.f32( + , + float, + i32); + +define @intrinsic_vfmin_vf_nxv1f32_nxv1f32_f32( %0, float %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_vf_nxv1f32_nxv1f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vfmin.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}} + %a = call @llvm.riscv.vfmin.nxv1f32.f32( + %0, + float %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vfmin.mask.nxv1f32.f32( + , + , + float, + , + i32); + +define @intrinsic_vfmin_mask_vf_nxv1f32_nxv1f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv1f32_nxv1f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vfmin.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmin.mask.nxv1f32.f32( + %0, + %1, + float %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmin.nxv2f32.f32( + , + float, + i32); + +define @intrinsic_vfmin_vf_nxv2f32_nxv2f32_f32( %0, float %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_vf_nxv2f32_nxv2f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vfmin.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}} + %a = call @llvm.riscv.vfmin.nxv2f32.f32( + %0, + float %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vfmin.mask.nxv2f32.f32( + , + , + float, + , + i32); + +define @intrinsic_vfmin_mask_vf_nxv2f32_nxv2f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv2f32_nxv2f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vfmin.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmin.mask.nxv2f32.f32( + %0, + %1, + float %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmin.nxv4f32.f32( + , + float, + i32); + +define @intrinsic_vfmin_vf_nxv4f32_nxv4f32_f32( %0, float %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_vf_nxv4f32_nxv4f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vfmin.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}} + %a = call @llvm.riscv.vfmin.nxv4f32.f32( + %0, + float %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vfmin.mask.nxv4f32.f32( + , + , + float, + , + i32); + +define @intrinsic_vfmin_mask_vf_nxv4f32_nxv4f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv4f32_nxv4f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vfmin.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmin.mask.nxv4f32.f32( + %0, + %1, + float %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmin.nxv8f32.f32( + , + float, + i32); + +define @intrinsic_vfmin_vf_nxv8f32_nxv8f32_f32( %0, float %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_vf_nxv8f32_nxv8f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vfmin.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}} + %a = call @llvm.riscv.vfmin.nxv8f32.f32( + %0, + float %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vfmin.mask.nxv8f32.f32( + , + , + float, + , + i32); + +define @intrinsic_vfmin_mask_vf_nxv8f32_nxv8f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv8f32_nxv8f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vfmin.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmin.mask.nxv8f32.f32( + %0, + %1, + float %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfmin.nxv16f32.f32( + , + float, + i32); + +define @intrinsic_vfmin_vf_nxv16f32_nxv16f32_f32( %0, float %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_vf_nxv16f32_nxv16f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m8,ta,mu +; CHECK: vfmin.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}} + %a = call @llvm.riscv.vfmin.nxv16f32.f32( + %0, + float %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vfmin.mask.nxv16f32.f32( + , + , + float, + , + i32); + +define @intrinsic_vfmin_mask_vf_nxv16f32_nxv16f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv16f32_nxv16f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m8,ta,mu +; CHECK: vfmin.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmin.mask.nxv16f32.f32( + %0, + %1, + float %2, + %3, + i32 %4) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmin-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfmin-rv64.ll new file mode 100644 index 0000000000000..815badcfc75a5 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfmin-rv64.ll @@ -0,0 +1,1201 @@ +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+experimental-zfh -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare @llvm.riscv.vfmin.nxv1f16.nxv1f16( + , + , + i64); + +define @intrinsic_vfmin_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_vv_nxv1f16_nxv1f16_nxv1f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vfmin.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vfmin.nxv1f16.nxv1f16( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfmin.mask.nxv1f16.nxv1f16( + , + , + , + , + i64); + +define @intrinsic_vfmin_mask_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv1f16_nxv1f16_nxv1f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vfmin.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmin.mask.nxv1f16.nxv1f16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmin.nxv2f16.nxv2f16( + , + , + i64); + +define @intrinsic_vfmin_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_vv_nxv2f16_nxv2f16_nxv2f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vfmin.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vfmin.nxv2f16.nxv2f16( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfmin.mask.nxv2f16.nxv2f16( + , + , + , + , + i64); + +define @intrinsic_vfmin_mask_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv2f16_nxv2f16_nxv2f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vfmin.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmin.mask.nxv2f16.nxv2f16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmin.nxv4f16.nxv4f16( + , + , + i64); + +define @intrinsic_vfmin_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_vv_nxv4f16_nxv4f16_nxv4f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vfmin.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vfmin.nxv4f16.nxv4f16( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfmin.mask.nxv4f16.nxv4f16( + , + , + , + , + i64); + +define @intrinsic_vfmin_mask_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv4f16_nxv4f16_nxv4f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vfmin.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmin.mask.nxv4f16.nxv4f16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmin.nxv8f16.nxv8f16( + , + , + i64); + +define @intrinsic_vfmin_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_vv_nxv8f16_nxv8f16_nxv8f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vfmin.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vfmin.nxv8f16.nxv8f16( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfmin.mask.nxv8f16.nxv8f16( + , + , + , + , + i64); + +define @intrinsic_vfmin_mask_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv8f16_nxv8f16_nxv8f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vfmin.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmin.mask.nxv8f16.nxv8f16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmin.nxv16f16.nxv16f16( + , + , + i64); + +define @intrinsic_vfmin_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_vv_nxv16f16_nxv16f16_nxv16f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vfmin.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vfmin.nxv16f16.nxv16f16( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfmin.mask.nxv16f16.nxv16f16( + , + , + , + , + i64); + +define @intrinsic_vfmin_mask_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv16f16_nxv16f16_nxv16f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vfmin.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmin.mask.nxv16f16.nxv16f16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmin.nxv32f16.nxv32f16( + , + , + i64); + +define @intrinsic_vfmin_vv_nxv32f16_nxv32f16_nxv32f16( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_vv_nxv32f16_nxv32f16_nxv32f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m8,ta,mu +; CHECK: vfmin.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vfmin.nxv32f16.nxv32f16( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfmin.mask.nxv32f16.nxv32f16( + , + , + , + , + i64); + +define @intrinsic_vfmin_mask_vv_nxv32f16_nxv32f16_nxv32f16( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv32f16_nxv32f16_nxv32f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m8,ta,mu +; CHECK: vfmin.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmin.mask.nxv32f16.nxv32f16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmin.nxv1f32.nxv1f32( + , + , + i64); + +define @intrinsic_vfmin_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_vv_nxv1f32_nxv1f32_nxv1f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vfmin.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vfmin.nxv1f32.nxv1f32( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfmin.mask.nxv1f32.nxv1f32( + , + , + , + , + i64); + +define @intrinsic_vfmin_mask_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv1f32_nxv1f32_nxv1f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vfmin.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmin.mask.nxv1f32.nxv1f32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmin.nxv2f32.nxv2f32( + , + , + i64); + +define @intrinsic_vfmin_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_vv_nxv2f32_nxv2f32_nxv2f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vfmin.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vfmin.nxv2f32.nxv2f32( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfmin.mask.nxv2f32.nxv2f32( + , + , + , + , + i64); + +define @intrinsic_vfmin_mask_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv2f32_nxv2f32_nxv2f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vfmin.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmin.mask.nxv2f32.nxv2f32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmin.nxv4f32.nxv4f32( + , + , + i64); + +define @intrinsic_vfmin_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_vv_nxv4f32_nxv4f32_nxv4f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vfmin.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vfmin.nxv4f32.nxv4f32( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfmin.mask.nxv4f32.nxv4f32( + , + , + , + , + i64); + +define @intrinsic_vfmin_mask_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv4f32_nxv4f32_nxv4f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vfmin.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmin.mask.nxv4f32.nxv4f32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmin.nxv8f32.nxv8f32( + , + , + i64); + +define @intrinsic_vfmin_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_vv_nxv8f32_nxv8f32_nxv8f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vfmin.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vfmin.nxv8f32.nxv8f32( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfmin.mask.nxv8f32.nxv8f32( + , + , + , + , + i64); + +define @intrinsic_vfmin_mask_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv8f32_nxv8f32_nxv8f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vfmin.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmin.mask.nxv8f32.nxv8f32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmin.nxv16f32.nxv16f32( + , + , + i64); + +define @intrinsic_vfmin_vv_nxv16f32_nxv16f32_nxv16f32( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_vv_nxv16f32_nxv16f32_nxv16f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m8,ta,mu +; CHECK: vfmin.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vfmin.nxv16f32.nxv16f32( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfmin.mask.nxv16f32.nxv16f32( + , + , + , + , + i64); + +define @intrinsic_vfmin_mask_vv_nxv16f32_nxv16f32_nxv16f32( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv16f32_nxv16f32_nxv16f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m8,ta,mu +; CHECK: vfmin.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmin.mask.nxv16f32.nxv16f32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmin.nxv1f64.nxv1f64( + , + , + i64); + +define @intrinsic_vfmin_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_vv_nxv1f64_nxv1f64_nxv1f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vfmin.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vfmin.nxv1f64.nxv1f64( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfmin.mask.nxv1f64.nxv1f64( + , + , + , + , + i64); + +define @intrinsic_vfmin_mask_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv1f64_nxv1f64_nxv1f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vfmin.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmin.mask.nxv1f64.nxv1f64( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmin.nxv2f64.nxv2f64( + , + , + i64); + +define @intrinsic_vfmin_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_vv_nxv2f64_nxv2f64_nxv2f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vfmin.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vfmin.nxv2f64.nxv2f64( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfmin.mask.nxv2f64.nxv2f64( + , + , + , + , + i64); + +define @intrinsic_vfmin_mask_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv2f64_nxv2f64_nxv2f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vfmin.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmin.mask.nxv2f64.nxv2f64( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmin.nxv4f64.nxv4f64( + , + , + i64); + +define @intrinsic_vfmin_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_vv_nxv4f64_nxv4f64_nxv4f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vfmin.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vfmin.nxv4f64.nxv4f64( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfmin.mask.nxv4f64.nxv4f64( + , + , + , + , + i64); + +define @intrinsic_vfmin_mask_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv4f64_nxv4f64_nxv4f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vfmin.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmin.mask.nxv4f64.nxv4f64( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmin.nxv8f64.nxv8f64( + , + , + i64); + +define @intrinsic_vfmin_vv_nxv8f64_nxv8f64_nxv8f64( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_vv_nxv8f64_nxv8f64_nxv8f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m8,ta,mu +; CHECK: vfmin.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call @llvm.riscv.vfmin.nxv8f64.nxv8f64( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfmin.mask.nxv8f64.nxv8f64( + , + , + , + , + i64); + +define @intrinsic_vfmin_mask_vv_nxv8f64_nxv8f64_nxv8f64( %0, %1, %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv8f64_nxv8f64_nxv8f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m8,ta,mu +; CHECK: vfmin.vv {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmin.mask.nxv8f64.nxv8f64( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmin.nxv1f16.f16( + , + half, + i64); + +define @intrinsic_vfmin_vf_nxv1f16_nxv1f16_f16( %0, half %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_vf_nxv1f16_nxv1f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vfmin.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}} + %a = call @llvm.riscv.vfmin.nxv1f16.f16( + %0, + half %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfmin.mask.nxv1f16.f16( + , + , + half, + , + i64); + +define @intrinsic_vfmin_mask_vf_nxv1f16_nxv1f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv1f16_nxv1f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4,ta,mu +; CHECK: vfmin.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmin.mask.nxv1f16.f16( + %0, + %1, + half %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmin.nxv2f16.f16( + , + half, + i64); + +define @intrinsic_vfmin_vf_nxv2f16_nxv2f16_f16( %0, half %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_vf_nxv2f16_nxv2f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vfmin.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}} + %a = call @llvm.riscv.vfmin.nxv2f16.f16( + %0, + half %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfmin.mask.nxv2f16.f16( + , + , + half, + , + i64); + +define @intrinsic_vfmin_mask_vf_nxv2f16_nxv2f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv2f16_nxv2f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2,ta,mu +; CHECK: vfmin.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmin.mask.nxv2f16.f16( + %0, + %1, + half %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmin.nxv4f16.f16( + , + half, + i64); + +define @intrinsic_vfmin_vf_nxv4f16_nxv4f16_f16( %0, half %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_vf_nxv4f16_nxv4f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vfmin.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}} + %a = call @llvm.riscv.vfmin.nxv4f16.f16( + %0, + half %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfmin.mask.nxv4f16.f16( + , + , + half, + , + i64); + +define @intrinsic_vfmin_mask_vf_nxv4f16_nxv4f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv4f16_nxv4f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1,ta,mu +; CHECK: vfmin.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmin.mask.nxv4f16.f16( + %0, + %1, + half %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmin.nxv8f16.f16( + , + half, + i64); + +define @intrinsic_vfmin_vf_nxv8f16_nxv8f16_f16( %0, half %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_vf_nxv8f16_nxv8f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vfmin.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}} + %a = call @llvm.riscv.vfmin.nxv8f16.f16( + %0, + half %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfmin.mask.nxv8f16.f16( + , + , + half, + , + i64); + +define @intrinsic_vfmin_mask_vf_nxv8f16_nxv8f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv8f16_nxv8f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2,ta,mu +; CHECK: vfmin.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmin.mask.nxv8f16.f16( + %0, + %1, + half %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmin.nxv16f16.f16( + , + half, + i64); + +define @intrinsic_vfmin_vf_nxv16f16_nxv16f16_f16( %0, half %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_vf_nxv16f16_nxv16f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vfmin.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}} + %a = call @llvm.riscv.vfmin.nxv16f16.f16( + %0, + half %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfmin.mask.nxv16f16.f16( + , + , + half, + , + i64); + +define @intrinsic_vfmin_mask_vf_nxv16f16_nxv16f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv16f16_nxv16f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4,ta,mu +; CHECK: vfmin.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmin.mask.nxv16f16.f16( + %0, + %1, + half %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmin.nxv32f16.f16( + , + half, + i64); + +define @intrinsic_vfmin_vf_nxv32f16_nxv32f16_f16( %0, half %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_vf_nxv32f16_nxv32f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m8,ta,mu +; CHECK: vfmin.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}} + %a = call @llvm.riscv.vfmin.nxv32f16.f16( + %0, + half %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfmin.mask.nxv32f16.f16( + , + , + half, + , + i64); + +define @intrinsic_vfmin_mask_vf_nxv32f16_nxv32f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv32f16_nxv32f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m8,ta,mu +; CHECK: vfmin.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmin.mask.nxv32f16.f16( + %0, + %1, + half %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmin.nxv1f32.f32( + , + float, + i64); + +define @intrinsic_vfmin_vf_nxv1f32_nxv1f32_f32( %0, float %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_vf_nxv1f32_nxv1f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vfmin.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}} + %a = call @llvm.riscv.vfmin.nxv1f32.f32( + %0, + float %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfmin.mask.nxv1f32.f32( + , + , + float, + , + i64); + +define @intrinsic_vfmin_mask_vf_nxv1f32_nxv1f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv1f32_nxv1f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2,ta,mu +; CHECK: vfmin.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmin.mask.nxv1f32.f32( + %0, + %1, + float %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmin.nxv2f32.f32( + , + float, + i64); + +define @intrinsic_vfmin_vf_nxv2f32_nxv2f32_f32( %0, float %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_vf_nxv2f32_nxv2f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vfmin.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}} + %a = call @llvm.riscv.vfmin.nxv2f32.f32( + %0, + float %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfmin.mask.nxv2f32.f32( + , + , + float, + , + i64); + +define @intrinsic_vfmin_mask_vf_nxv2f32_nxv2f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv2f32_nxv2f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1,ta,mu +; CHECK: vfmin.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmin.mask.nxv2f32.f32( + %0, + %1, + float %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmin.nxv4f32.f32( + , + float, + i64); + +define @intrinsic_vfmin_vf_nxv4f32_nxv4f32_f32( %0, float %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_vf_nxv4f32_nxv4f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vfmin.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}} + %a = call @llvm.riscv.vfmin.nxv4f32.f32( + %0, + float %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfmin.mask.nxv4f32.f32( + , + , + float, + , + i64); + +define @intrinsic_vfmin_mask_vf_nxv4f32_nxv4f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv4f32_nxv4f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2,ta,mu +; CHECK: vfmin.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmin.mask.nxv4f32.f32( + %0, + %1, + float %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmin.nxv8f32.f32( + , + float, + i64); + +define @intrinsic_vfmin_vf_nxv8f32_nxv8f32_f32( %0, float %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_vf_nxv8f32_nxv8f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vfmin.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}} + %a = call @llvm.riscv.vfmin.nxv8f32.f32( + %0, + float %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfmin.mask.nxv8f32.f32( + , + , + float, + , + i64); + +define @intrinsic_vfmin_mask_vf_nxv8f32_nxv8f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv8f32_nxv8f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4,ta,mu +; CHECK: vfmin.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmin.mask.nxv8f32.f32( + %0, + %1, + float %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmin.nxv16f32.f32( + , + float, + i64); + +define @intrinsic_vfmin_vf_nxv16f32_nxv16f32_f32( %0, float %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_vf_nxv16f32_nxv16f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m8,ta,mu +; CHECK: vfmin.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}} + %a = call @llvm.riscv.vfmin.nxv16f32.f32( + %0, + float %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfmin.mask.nxv16f32.f32( + , + , + float, + , + i64); + +define @intrinsic_vfmin_mask_vf_nxv16f32_nxv16f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv16f32_nxv16f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m8,ta,mu +; CHECK: vfmin.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmin.mask.nxv16f32.f32( + %0, + %1, + float %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmin.nxv1f64.f64( + , + double, + i64); + +define @intrinsic_vfmin_vf_nxv1f64_nxv1f64_f64( %0, double %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_vf_nxv1f64_nxv1f64_f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vfmin.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}} + %a = call @llvm.riscv.vfmin.nxv1f64.f64( + %0, + double %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfmin.mask.nxv1f64.f64( + , + , + double, + , + i64); + +define @intrinsic_vfmin_mask_vf_nxv1f64_nxv1f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv1f64_nxv1f64_f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1,ta,mu +; CHECK: vfmin.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmin.mask.nxv1f64.f64( + %0, + %1, + double %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmin.nxv2f64.f64( + , + double, + i64); + +define @intrinsic_vfmin_vf_nxv2f64_nxv2f64_f64( %0, double %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_vf_nxv2f64_nxv2f64_f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vfmin.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}} + %a = call @llvm.riscv.vfmin.nxv2f64.f64( + %0, + double %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfmin.mask.nxv2f64.f64( + , + , + double, + , + i64); + +define @intrinsic_vfmin_mask_vf_nxv2f64_nxv2f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv2f64_nxv2f64_f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2,ta,mu +; CHECK: vfmin.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmin.mask.nxv2f64.f64( + %0, + %1, + double %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmin.nxv4f64.f64( + , + double, + i64); + +define @intrinsic_vfmin_vf_nxv4f64_nxv4f64_f64( %0, double %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_vf_nxv4f64_nxv4f64_f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vfmin.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}} + %a = call @llvm.riscv.vfmin.nxv4f64.f64( + %0, + double %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfmin.mask.nxv4f64.f64( + , + , + double, + , + i64); + +define @intrinsic_vfmin_mask_vf_nxv4f64_nxv4f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv4f64_nxv4f64_f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4,ta,mu +; CHECK: vfmin.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmin.mask.nxv4f64.f64( + %0, + %1, + double %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfmin.nxv8f64.f64( + , + double, + i64); + +define @intrinsic_vfmin_vf_nxv8f64_nxv8f64_f64( %0, double %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_vf_nxv8f64_nxv8f64_f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m8,ta,mu +; CHECK: vfmin.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}} + %a = call @llvm.riscv.vfmin.nxv8f64.f64( + %0, + double %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfmin.mask.nxv8f64.f64( + , + , + double, + , + i64); + +define @intrinsic_vfmin_mask_vf_nxv8f64_nxv8f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv8f64_nxv8f64_f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m8,ta,mu +; CHECK: vfmin.vf {{v[0-9]+}}, {{v[0-9]+}}, {{(a|ft)[0-9]+}}, v0.t + %a = call @llvm.riscv.vfmin.mask.nxv8f64.f64( + %0, + %1, + double %2, + %3, + i64 %4) + + ret %a +} From 032600b9aef9dafe62fda6e880b5c1b0a87e4364 Mon Sep 17 00:00:00 2001 From: Zakk Chen Date: Mon, 21 Dec 2020 20:50:58 -0800 Subject: [PATCH 157/378] [RISCV] Define vmerge/vfmerge intrinsics. Define vmerge/vfmerge intrinsics and lower to V instructions. Include support for vector-vector vfmerge by vmerge.vvm. We work with @rogfer01 from BSC to come out this patch. Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D93674 --- llvm/include/llvm/IR/IntrinsicsRISCV.td | 5 +- .../Target/RISCV/RISCVInstrInfoVPseudos.td | 50 +- llvm/test/CodeGen/RISCV/rvv/vfmerge-rv32.ll | 441 ++++++ llvm/test/CodeGen/RISCV/rvv/vfmerge-rv64.ll | 601 +++++++++ llvm/test/CodeGen/RISCV/rvv/vmerge-rv32.ll | 973 ++++++++++++++ llvm/test/CodeGen/RISCV/rvv/vmerge-rv64.ll | 1189 +++++++++++++++++ 6 files changed, 3248 insertions(+), 11 deletions(-) create mode 100644 llvm/test/CodeGen/RISCV/rvv/vfmerge-rv32.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vfmerge-rv64.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vmerge-rv32.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vmerge-rv64.ll diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td index 4e075a8f2db76..0dcc9e5b1d6c9 100644 --- a/llvm/include/llvm/IR/IntrinsicsRISCV.td +++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td @@ -518,6 +518,8 @@ let TargetPrefix = "riscv" in { defm vssubu : RISCVSaturatingBinaryAAX; defm vssub : RISCVSaturatingBinaryAAX; + def int_riscv_vmerge : RISCVBinaryWithV0; + def int_riscv_vmv_v_v : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, llvm_anyint_ty], [IntrNoMem]>, RISCVVIntrinsic; @@ -526,7 +528,6 @@ let TargetPrefix = "riscv" in { [IntrNoMem]>, RISCVVIntrinsic { let ExtendOperand = 1; } - def int_riscv_vmv_x_s : Intrinsic<[LLVMVectorElementType<0>], [llvm_anyint_ty], [IntrNoMem]>, RISCVVIntrinsic; @@ -567,6 +568,8 @@ let TargetPrefix = "riscv" in { defm vfsgnjn : RISCVBinaryAAX; defm vfsgnjx : RISCVBinaryAAX; + defm vfmerge : RISCVBinaryWithV0; + defm vslideup : RISCVTernaryAAAX; defm vslidedown : RISCVTernaryAAAX; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td index da3a9242acda3..1540ea403b926 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -749,13 +749,14 @@ multiclass VPseudoBinaryV_VM { + string Constraint = "", bit IsFloat = 0> { foreach m = MxList.m in - def "_VX" # !if(CarryIn, "M", "") # "_" # m.MX : + def !if(IsFloat, "_VF", "_VX") # !if(CarryIn, "M", "") # "_" # m.MX : VPseudoBinaryCarryIn.R, m.vrclass)), - m.vrclass, GPR, m, CarryIn, Constraint>; + m.vrclass, !if(IsFloat, FPR32, GPR), + m, CarryIn, Constraint>; } multiclass VPseudoBinaryV_IM { - foreach vti = AllIntegerVectors in + bit CarryOut = 0, + list vtilist = AllIntegerVectors> { + foreach vti = vtilist in defm : VPatBinaryCarryIn { - foreach vti = AllIntegerVectors in - defm : VPatBinaryCarryIn vtilist = AllIntegerVectors> { + foreach vti = vtilist in + defm : VPatBinaryCarryIn; + vti.RegClass, vti.ScalarRegClass>; } multiclass VPatBinaryV_IM; defm PseudoVMFLE : VPseudoBinaryM_VV_VX; defm PseudoVMFGT : VPseudoBinaryM_VX; defm PseudoVMFGE : VPseudoBinaryM_VX; + +//===----------------------------------------------------------------------===// +// 14.15. Vector Floating-Point Merge Instruction +//===----------------------------------------------------------------------===// +defm PseudoVFMERGE : VPseudoBinaryV_XM; + } // Predicates = [HasStdExtV, HasStdExtF] //===----------------------------------------------------------------------===// @@ -2256,6 +2272,11 @@ defm "" : VPatTernaryW_VV_VX<"int_riscv_vwmacc", "PseudoVWMACC", AllWidenableInt defm "" : VPatTernaryW_VV_VX<"int_riscv_vwmaccsu", "PseudoVWMACCSU", AllWidenableIntVectors>; defm "" : VPatTernaryW_VX<"int_riscv_vwmaccus", "PseudoVWMACCUS", AllWidenableIntVectors>; +//===----------------------------------------------------------------------===// +// 12.15. Vector Integer Merge Instructions +//===----------------------------------------------------------------------===// +defm "" : VPatBinaryV_VM_XM_IM<"int_riscv_vmerge", "PseudoVMERGE">; + //===----------------------------------------------------------------------===// // 12.17. Vector Integer Move Instructions //===----------------------------------------------------------------------===// @@ -2375,6 +2396,15 @@ defm "" : VPatBinaryM_VV_VX<"int_riscv_vmfne", "PseudoVMFNE", AllFloatVectors>; defm "" : VPatBinaryM_VX<"int_riscv_vmfgt", "PseudoVMFGT", AllFloatVectors>; defm "" : VPatBinaryM_VX<"int_riscv_vmfge", "PseudoVMFGE", AllFloatVectors>; +//===----------------------------------------------------------------------===// +// 14.15. Vector Floating-Point Merge Instruction +//===----------------------------------------------------------------------===// +// We can use vmerge.vvm to support vector-vector vfmerge. +defm "" : VPatBinaryV_VM<"int_riscv_vfmerge", "PseudoVMERGE", + /*CarryOut = */0, /*vtilist=*/AllFloatVectors>; +defm "" : VPatBinaryV_XM<"int_riscv_vfmerge", "PseudoVFMERGE", + /*CarryOut = */0, /*vtilist=*/AllFloatVectors>; + } // Predicates = [HasStdExtV, HasStdExtF] //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmerge-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfmerge-rv32.ll new file mode 100644 index 0000000000000..a6b09704c8a67 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfmerge-rv32.ll @@ -0,0 +1,441 @@ +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f,+experimental-zfh -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare @llvm.riscv.vfmerge.nxv1f16.nxv1f16( + , + , + , + i32); + +define @intrinsic_vfmerge_vvm_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmerge_vvm_nxv1f16_nxv1f16_nxv1f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4 +; CHECK: vmerge.vvm {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0 + %a = call @llvm.riscv.vfmerge.nxv1f16.nxv1f16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmerge.nxv1f16.f16( + , + half, + , + i32); + +define @intrinsic_vfmerge_vfm_nxv1f16_nxv1f16_f16( %0, half %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv1f16_nxv1f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4 +; CHECK: vfmerge.vfm {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0 + %a = call @llvm.riscv.vfmerge.nxv1f16.f16( + %0, + half %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmerge.nxv2f16.nxv2f16( + , + , + , + i32); + +define @intrinsic_vfmerge_vvm_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmerge_vvm_nxv2f16_nxv2f16_nxv2f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2 +; CHECK: vmerge.vvm {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0 + %a = call @llvm.riscv.vfmerge.nxv2f16.nxv2f16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmerge.nxv2f16.f16( + , + half, + , + i32); + +define @intrinsic_vfmerge_vfm_nxv2f16_nxv2f16_f16( %0, half %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv2f16_nxv2f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2 +; CHECK: vfmerge.vfm {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0 + %a = call @llvm.riscv.vfmerge.nxv2f16.f16( + %0, + half %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmerge.nxv4f16.nxv4f16( + , + , + , + i32); + +define @intrinsic_vfmerge_vvm_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmerge_vvm_nxv4f16_nxv4f16_nxv4f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1 +; CHECK: vmerge.vvm {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0 + %a = call @llvm.riscv.vfmerge.nxv4f16.nxv4f16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmerge.nxv4f16.f16( + , + half, + , + i32); + +define @intrinsic_vfmerge_vfm_nxv4f16_nxv4f16_f16( %0, half %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv4f16_nxv4f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1 +; CHECK: vfmerge.vfm {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0 + %a = call @llvm.riscv.vfmerge.nxv4f16.f16( + %0, + half %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmerge.nxv8f16.nxv8f16( + , + , + , + i32); + +define @intrinsic_vfmerge_vvm_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmerge_vvm_nxv8f16_nxv8f16_nxv8f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2 +; CHECK: vmerge.vvm {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0 + %a = call @llvm.riscv.vfmerge.nxv8f16.nxv8f16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmerge.nxv8f16.f16( + , + half, + , + i32); + +define @intrinsic_vfmerge_vfm_nxv8f16_nxv8f16_f16( %0, half %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv8f16_nxv8f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2 +; CHECK: vfmerge.vfm {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0 + %a = call @llvm.riscv.vfmerge.nxv8f16.f16( + %0, + half %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmerge.nxv16f16.nxv16f16( + , + , + , + i32); + +define @intrinsic_vfmerge_vvm_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmerge_vvm_nxv16f16_nxv16f16_nxv16f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4 +; CHECK: vmerge.vvm {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0 + %a = call @llvm.riscv.vfmerge.nxv16f16.nxv16f16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmerge.nxv16f16.f16( + , + half, + , + i32); + +define @intrinsic_vfmerge_vfm_nxv16f16_nxv16f16_f16( %0, half %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv16f16_nxv16f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4 +; CHECK: vfmerge.vfm {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0 + %a = call @llvm.riscv.vfmerge.nxv16f16.f16( + %0, + half %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmerge.nxv32f16.nxv32f16( + , + , + , + i32); + +define @intrinsic_vfmerge_vvm_nxv32f16_nxv32f16_nxv32f16( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmerge_vvm_nxv32f16_nxv32f16_nxv32f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m8 +; CHECK: vmerge.vvm {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0 + %a = call @llvm.riscv.vfmerge.nxv32f16.nxv32f16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmerge.nxv32f16.f16( + , + half, + , + i32); + +define @intrinsic_vfmerge_vfm_nxv32f16_nxv32f16_f16( %0, half %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv32f16_nxv32f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m8 +; CHECK: vfmerge.vfm {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0 + %a = call @llvm.riscv.vfmerge.nxv32f16.f16( + %0, + half %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmerge.nxv1f32.nxv1f32( + , + , + , + i32); + +define @intrinsic_vfmerge_vvm_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmerge_vvm_nxv1f32_nxv1f32_nxv1f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2 +; CHECK: vmerge.vvm {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0 + %a = call @llvm.riscv.vfmerge.nxv1f32.nxv1f32( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmerge.nxv1f32.f32( + , + float, + , + i32); + +define @intrinsic_vfmerge_vfm_nxv1f32_nxv1f32_f32( %0, float %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv1f32_nxv1f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2 +; CHECK: vfmerge.vfm {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0 + %a = call @llvm.riscv.vfmerge.nxv1f32.f32( + %0, + float %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmerge.nxv2f32.nxv2f32( + , + , + , + i32); + +define @intrinsic_vfmerge_vvm_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmerge_vvm_nxv2f32_nxv2f32_nxv2f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1 +; CHECK: vmerge.vvm {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0 + %a = call @llvm.riscv.vfmerge.nxv2f32.nxv2f32( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmerge.nxv2f32.f32( + , + float, + , + i32); + +define @intrinsic_vfmerge_vfm_nxv2f32_nxv2f32_f32( %0, float %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv2f32_nxv2f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1 +; CHECK: vfmerge.vfm {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0 + %a = call @llvm.riscv.vfmerge.nxv2f32.f32( + %0, + float %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmerge.nxv4f32.nxv4f32( + , + , + , + i32); + +define @intrinsic_vfmerge_vvm_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmerge_vvm_nxv4f32_nxv4f32_nxv4f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2 +; CHECK: vmerge.vvm {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0 + %a = call @llvm.riscv.vfmerge.nxv4f32.nxv4f32( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmerge.nxv4f32.f32( + , + float, + , + i32); + +define @intrinsic_vfmerge_vfm_nxv4f32_nxv4f32_f32( %0, float %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv4f32_nxv4f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2 +; CHECK: vfmerge.vfm {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0 + %a = call @llvm.riscv.vfmerge.nxv4f32.f32( + %0, + float %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmerge.nxv8f32.nxv8f32( + , + , + , + i32); + +define @intrinsic_vfmerge_vvm_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmerge_vvm_nxv8f32_nxv8f32_nxv8f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4 +; CHECK: vmerge.vvm {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0 + %a = call @llvm.riscv.vfmerge.nxv8f32.nxv8f32( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmerge.nxv8f32.f32( + , + float, + , + i32); + +define @intrinsic_vfmerge_vfm_nxv8f32_nxv8f32_f32( %0, float %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv8f32_nxv8f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4 +; CHECK: vfmerge.vfm {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0 + %a = call @llvm.riscv.vfmerge.nxv8f32.f32( + %0, + float %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmerge.nxv16f32.nxv16f32( + , + , + , + i32); + +define @intrinsic_vfmerge_vvm_nxv16f32_nxv16f32_nxv16f32( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmerge_vvm_nxv16f32_nxv16f32_nxv16f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m8 +; CHECK: vmerge.vvm {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0 + %a = call @llvm.riscv.vfmerge.nxv16f32.nxv16f32( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfmerge.nxv16f32.f32( + , + float, + , + i32); + +define @intrinsic_vfmerge_vfm_nxv16f32_nxv16f32_f32( %0, float %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv16f32_nxv16f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m8 +; CHECK: vfmerge.vfm {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0 + %a = call @llvm.riscv.vfmerge.nxv16f32.f32( + %0, + float %1, + %2, + i32 %3) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmerge-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfmerge-rv64.ll new file mode 100644 index 0000000000000..6f3cafce40e66 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfmerge-rv64.ll @@ -0,0 +1,601 @@ +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+experimental-zfh -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare @llvm.riscv.vfmerge.nxv1f16.nxv1f16( + , + , + , + i64); + +define @intrinsic_vfmerge_vvm_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmerge_vvm_nxv1f16_nxv1f16_nxv1f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4 +; CHECK: vmerge.vvm {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0 + %a = call @llvm.riscv.vfmerge.nxv1f16.nxv1f16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmerge.nxv1f16.f16( + , + half, + , + i64); + +define @intrinsic_vfmerge_vfm_nxv1f16_nxv1f16_f16( %0, half %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv1f16_nxv1f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4 +; CHECK: vfmerge.vfm {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0 + %a = call @llvm.riscv.vfmerge.nxv1f16.f16( + %0, + half %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmerge.nxv2f16.nxv2f16( + , + , + , + i64); + +define @intrinsic_vfmerge_vvm_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmerge_vvm_nxv2f16_nxv2f16_nxv2f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2 +; CHECK: vmerge.vvm {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0 + %a = call @llvm.riscv.vfmerge.nxv2f16.nxv2f16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmerge.nxv2f16.f16( + , + half, + , + i64); + +define @intrinsic_vfmerge_vfm_nxv2f16_nxv2f16_f16( %0, half %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv2f16_nxv2f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2 +; CHECK: vfmerge.vfm {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0 + %a = call @llvm.riscv.vfmerge.nxv2f16.f16( + %0, + half %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmerge.nxv4f16.nxv4f16( + , + , + , + i64); + +define @intrinsic_vfmerge_vvm_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmerge_vvm_nxv4f16_nxv4f16_nxv4f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1 +; CHECK: vmerge.vvm {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0 + %a = call @llvm.riscv.vfmerge.nxv4f16.nxv4f16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmerge.nxv4f16.f16( + , + half, + , + i64); + +define @intrinsic_vfmerge_vfm_nxv4f16_nxv4f16_f16( %0, half %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv4f16_nxv4f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1 +; CHECK: vfmerge.vfm {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0 + %a = call @llvm.riscv.vfmerge.nxv4f16.f16( + %0, + half %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmerge.nxv8f16.nxv8f16( + , + , + , + i64); + +define @intrinsic_vfmerge_vvm_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmerge_vvm_nxv8f16_nxv8f16_nxv8f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2 +; CHECK: vmerge.vvm {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0 + %a = call @llvm.riscv.vfmerge.nxv8f16.nxv8f16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmerge.nxv8f16.f16( + , + half, + , + i64); + +define @intrinsic_vfmerge_vfm_nxv8f16_nxv8f16_f16( %0, half %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv8f16_nxv8f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2 +; CHECK: vfmerge.vfm {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0 + %a = call @llvm.riscv.vfmerge.nxv8f16.f16( + %0, + half %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmerge.nxv16f16.nxv16f16( + , + , + , + i64); + +define @intrinsic_vfmerge_vvm_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmerge_vvm_nxv16f16_nxv16f16_nxv16f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4 +; CHECK: vmerge.vvm {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0 + %a = call @llvm.riscv.vfmerge.nxv16f16.nxv16f16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmerge.nxv16f16.f16( + , + half, + , + i64); + +define @intrinsic_vfmerge_vfm_nxv16f16_nxv16f16_f16( %0, half %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv16f16_nxv16f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4 +; CHECK: vfmerge.vfm {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0 + %a = call @llvm.riscv.vfmerge.nxv16f16.f16( + %0, + half %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmerge.nxv32f16.nxv32f16( + , + , + , + i64); + +define @intrinsic_vfmerge_vvm_nxv32f16_nxv32f16_nxv32f16( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmerge_vvm_nxv32f16_nxv32f16_nxv32f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m8 +; CHECK: vmerge.vvm {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0 + %a = call @llvm.riscv.vfmerge.nxv32f16.nxv32f16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmerge.nxv32f16.f16( + , + half, + , + i64); + +define @intrinsic_vfmerge_vfm_nxv32f16_nxv32f16_f16( %0, half %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv32f16_nxv32f16_f16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m8 +; CHECK: vfmerge.vfm {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0 + %a = call @llvm.riscv.vfmerge.nxv32f16.f16( + %0, + half %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmerge.nxv1f32.nxv1f32( + , + , + , + i64); + +define @intrinsic_vfmerge_vvm_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmerge_vvm_nxv1f32_nxv1f32_nxv1f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2 +; CHECK: vmerge.vvm {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0 + %a = call @llvm.riscv.vfmerge.nxv1f32.nxv1f32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmerge.nxv1f32.f32( + , + float, + , + i64); + +define @intrinsic_vfmerge_vfm_nxv1f32_nxv1f32_f32( %0, float %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv1f32_nxv1f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2 +; CHECK: vfmerge.vfm {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0 + %a = call @llvm.riscv.vfmerge.nxv1f32.f32( + %0, + float %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmerge.nxv2f32.nxv2f32( + , + , + , + i64); + +define @intrinsic_vfmerge_vvm_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmerge_vvm_nxv2f32_nxv2f32_nxv2f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1 +; CHECK: vmerge.vvm {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0 + %a = call @llvm.riscv.vfmerge.nxv2f32.nxv2f32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmerge.nxv2f32.f32( + , + float, + , + i64); + +define @intrinsic_vfmerge_vfm_nxv2f32_nxv2f32_f32( %0, float %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv2f32_nxv2f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1 +; CHECK: vfmerge.vfm {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0 + %a = call @llvm.riscv.vfmerge.nxv2f32.f32( + %0, + float %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmerge.nxv4f32.nxv4f32( + , + , + , + i64); + +define @intrinsic_vfmerge_vvm_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmerge_vvm_nxv4f32_nxv4f32_nxv4f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2 +; CHECK: vmerge.vvm {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0 + %a = call @llvm.riscv.vfmerge.nxv4f32.nxv4f32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmerge.nxv4f32.f32( + , + float, + , + i64); + +define @intrinsic_vfmerge_vfm_nxv4f32_nxv4f32_f32( %0, float %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv4f32_nxv4f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2 +; CHECK: vfmerge.vfm {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0 + %a = call @llvm.riscv.vfmerge.nxv4f32.f32( + %0, + float %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmerge.nxv8f32.nxv8f32( + , + , + , + i64); + +define @intrinsic_vfmerge_vvm_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmerge_vvm_nxv8f32_nxv8f32_nxv8f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4 +; CHECK: vmerge.vvm {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0 + %a = call @llvm.riscv.vfmerge.nxv8f32.nxv8f32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmerge.nxv8f32.f32( + , + float, + , + i64); + +define @intrinsic_vfmerge_vfm_nxv8f32_nxv8f32_f32( %0, float %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv8f32_nxv8f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4 +; CHECK: vfmerge.vfm {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0 + %a = call @llvm.riscv.vfmerge.nxv8f32.f32( + %0, + float %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmerge.nxv16f32.nxv16f32( + , + , + , + i64); + +define @intrinsic_vfmerge_vvm_nxv16f32_nxv16f32_nxv16f32( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmerge_vvm_nxv16f32_nxv16f32_nxv16f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m8 +; CHECK: vmerge.vvm {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0 + %a = call @llvm.riscv.vfmerge.nxv16f32.nxv16f32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmerge.nxv16f32.f32( + , + float, + , + i64); + +define @intrinsic_vfmerge_vfm_nxv16f32_nxv16f32_f32( %0, float %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv16f32_nxv16f32_f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m8 +; CHECK: vfmerge.vfm {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0 + %a = call @llvm.riscv.vfmerge.nxv16f32.f32( + %0, + float %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmerge.nxv1f64.nxv1f64( + , + , + , + i64); + +define @intrinsic_vfmerge_vvm_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmerge_vvm_nxv1f64_nxv1f64_nxv1f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1 +; CHECK: vmerge.vvm {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0 + %a = call @llvm.riscv.vfmerge.nxv1f64.nxv1f64( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmerge.nxv1f64.f64( + , + double, + , + i64); + +define @intrinsic_vfmerge_vfm_nxv1f64_nxv1f64_f64( %0, double %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv1f64_nxv1f64_f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1 +; CHECK: vfmerge.vfm {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0 + %a = call @llvm.riscv.vfmerge.nxv1f64.f64( + %0, + double %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmerge.nxv2f64.nxv2f64( + , + , + , + i64); + +define @intrinsic_vfmerge_vvm_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmerge_vvm_nxv2f64_nxv2f64_nxv2f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2 +; CHECK: vmerge.vvm {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0 + %a = call @llvm.riscv.vfmerge.nxv2f64.nxv2f64( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmerge.nxv2f64.f64( + , + double, + , + i64); + +define @intrinsic_vfmerge_vfm_nxv2f64_nxv2f64_f64( %0, double %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv2f64_nxv2f64_f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2 +; CHECK: vfmerge.vfm {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0 + %a = call @llvm.riscv.vfmerge.nxv2f64.f64( + %0, + double %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmerge.nxv4f64.nxv4f64( + , + , + , + i64); + +define @intrinsic_vfmerge_vvm_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmerge_vvm_nxv4f64_nxv4f64_nxv4f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4 +; CHECK: vmerge.vvm {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0 + %a = call @llvm.riscv.vfmerge.nxv4f64.nxv4f64( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmerge.nxv4f64.f64( + , + double, + , + i64); + +define @intrinsic_vfmerge_vfm_nxv4f64_nxv4f64_f64( %0, double %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv4f64_nxv4f64_f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4 +; CHECK: vfmerge.vfm {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0 + %a = call @llvm.riscv.vfmerge.nxv4f64.f64( + %0, + double %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmerge.nxv8f64.nxv8f64( + , + , + , + i64); + +define @intrinsic_vfmerge_vvm_nxv8f64_nxv8f64_nxv8f64( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmerge_vvm_nxv8f64_nxv8f64_nxv8f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m8 +; CHECK: vmerge.vvm {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0 + %a = call @llvm.riscv.vfmerge.nxv8f64.nxv8f64( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfmerge.nxv8f64.f64( + , + double, + , + i64); + +define @intrinsic_vfmerge_vfm_nxv8f64_nxv8f64_f64( %0, double %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv8f64_nxv8f64_f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m8 +; CHECK: vfmerge.vfm {{v[0-9]+}}, {{v[0-9]+}}, {{ft[0-9]+}}, v0 + %a = call @llvm.riscv.vfmerge.nxv8f64.f64( + %0, + double %1, + %2, + i64 %3) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vmerge-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmerge-rv32.ll new file mode 100644 index 0000000000000..320925f178d07 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vmerge-rv32.ll @@ -0,0 +1,973 @@ +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare @llvm.riscv.vmerge.nxv1i8.nxv1i8( + , + , + , + i32); + +define @intrinsic_vmerge_vvm_nxv1i8_nxv1i8_nxv1i8( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vvm_nxv1i8_nxv1i8_nxv1i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8 +; CHECK: vmerge.vvm {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0 + %a = call @llvm.riscv.vmerge.nxv1i8.nxv1i8( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmerge.nxv2i8.nxv2i8( + , + , + , + i32); + +define @intrinsic_vmerge_vvm_nxv2i8_nxv2i8_nxv2i8( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vvm_nxv2i8_nxv2i8_nxv2i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4 +; CHECK: vmerge.vvm {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0 + %a = call @llvm.riscv.vmerge.nxv2i8.nxv2i8( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmerge.nxv4i8.nxv4i8( + , + , + , + i32); + +define @intrinsic_vmerge_vvm_nxv4i8_nxv4i8_nxv4i8( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vvm_nxv4i8_nxv4i8_nxv4i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2 +; CHECK: vmerge.vvm {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0 + %a = call @llvm.riscv.vmerge.nxv4i8.nxv4i8( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmerge.nxv8i8.nxv8i8( + , + , + , + i32); + +define @intrinsic_vmerge_vvm_nxv8i8_nxv8i8_nxv8i8( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vvm_nxv8i8_nxv8i8_nxv8i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1 +; CHECK: vmerge.vvm {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0 + %a = call @llvm.riscv.vmerge.nxv8i8.nxv8i8( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmerge.nxv16i8.nxv16i8( + , + , + , + i32); + +define @intrinsic_vmerge_vvm_nxv16i8_nxv16i8_nxv16i8( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vvm_nxv16i8_nxv16i8_nxv16i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2 +; CHECK: vmerge.vvm {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0 + %a = call @llvm.riscv.vmerge.nxv16i8.nxv16i8( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmerge.nxv32i8.nxv32i8( + , + , + , + i32); + +define @intrinsic_vmerge_vvm_nxv32i8_nxv32i8_nxv32i8( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vvm_nxv32i8_nxv32i8_nxv32i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4 +; CHECK: vmerge.vvm {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0 + %a = call @llvm.riscv.vmerge.nxv32i8.nxv32i8( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmerge.nxv64i8.nxv64i8( + , + , + , + i32); + +define @intrinsic_vmerge_vvm_nxv64i8_nxv64i8_nxv64i8( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vvm_nxv64i8_nxv64i8_nxv64i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m8 +; CHECK: vmerge.vvm {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0 + %a = call @llvm.riscv.vmerge.nxv64i8.nxv64i8( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmerge.nxv1i16.nxv1i16( + , + , + , + i32); + +define @intrinsic_vmerge_vvm_nxv1i16_nxv1i16_nxv1i16( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vvm_nxv1i16_nxv1i16_nxv1i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4 +; CHECK: vmerge.vvm {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0 + %a = call @llvm.riscv.vmerge.nxv1i16.nxv1i16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmerge.nxv2i16.nxv2i16( + , + , + , + i32); + +define @intrinsic_vmerge_vvm_nxv2i16_nxv2i16_nxv2i16( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vvm_nxv2i16_nxv2i16_nxv2i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2 +; CHECK: vmerge.vvm {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0 + %a = call @llvm.riscv.vmerge.nxv2i16.nxv2i16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmerge.nxv4i16.nxv4i16( + , + , + , + i32); + +define @intrinsic_vmerge_vvm_nxv4i16_nxv4i16_nxv4i16( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vvm_nxv4i16_nxv4i16_nxv4i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1 +; CHECK: vmerge.vvm {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0 + %a = call @llvm.riscv.vmerge.nxv4i16.nxv4i16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmerge.nxv8i16.nxv8i16( + , + , + , + i32); + +define @intrinsic_vmerge_vvm_nxv8i16_nxv8i16_nxv8i16( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vvm_nxv8i16_nxv8i16_nxv8i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2 +; CHECK: vmerge.vvm {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0 + %a = call @llvm.riscv.vmerge.nxv8i16.nxv8i16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmerge.nxv16i16.nxv16i16( + , + , + , + i32); + +define @intrinsic_vmerge_vvm_nxv16i16_nxv16i16_nxv16i16( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vvm_nxv16i16_nxv16i16_nxv16i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4 +; CHECK: vmerge.vvm {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0 + %a = call @llvm.riscv.vmerge.nxv16i16.nxv16i16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmerge.nxv32i16.nxv32i16( + , + , + , + i32); + +define @intrinsic_vmerge_vvm_nxv32i16_nxv32i16_nxv32i16( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vvm_nxv32i16_nxv32i16_nxv32i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m8 +; CHECK: vmerge.vvm {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0 + %a = call @llvm.riscv.vmerge.nxv32i16.nxv32i16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmerge.nxv1i32.nxv1i32( + , + , + , + i32); + +define @intrinsic_vmerge_vvm_nxv1i32_nxv1i32_nxv1i32( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vvm_nxv1i32_nxv1i32_nxv1i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2 +; CHECK: vmerge.vvm {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0 + %a = call @llvm.riscv.vmerge.nxv1i32.nxv1i32( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmerge.nxv2i32.nxv2i32( + , + , + , + i32); + +define @intrinsic_vmerge_vvm_nxv2i32_nxv2i32_nxv2i32( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vvm_nxv2i32_nxv2i32_nxv2i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1 +; CHECK: vmerge.vvm {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0 + %a = call @llvm.riscv.vmerge.nxv2i32.nxv2i32( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmerge.nxv4i32.nxv4i32( + , + , + , + i32); + +define @intrinsic_vmerge_vvm_nxv4i32_nxv4i32_nxv4i32( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vvm_nxv4i32_nxv4i32_nxv4i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2 +; CHECK: vmerge.vvm {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0 + %a = call @llvm.riscv.vmerge.nxv4i32.nxv4i32( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmerge.nxv8i32.nxv8i32( + , + , + , + i32); + +define @intrinsic_vmerge_vvm_nxv8i32_nxv8i32_nxv8i32( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vvm_nxv8i32_nxv8i32_nxv8i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4 +; CHECK: vmerge.vvm {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0 + %a = call @llvm.riscv.vmerge.nxv8i32.nxv8i32( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmerge.nxv16i32.nxv16i32( + , + , + , + i32); + +define @intrinsic_vmerge_vvm_nxv16i32_nxv16i32_nxv16i32( %0, %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vvm_nxv16i32_nxv16i32_nxv16i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m8 +; CHECK: vmerge.vvm {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0 + %a = call @llvm.riscv.vmerge.nxv16i32.nxv16i32( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmerge.nxv1i8.i8( + , + i8, + , + i32); + +define @intrinsic_vmerge_vxm_nxv1i8_nxv1i8_i8( %0, i8 %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vxm_nxv1i8_nxv1i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8 +; CHECK: vmerge.vxm {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0 + %a = call @llvm.riscv.vmerge.nxv1i8.i8( + %0, + i8 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmerge.nxv2i8.i8( + , + i8, + , + i32); + +define @intrinsic_vmerge_vxm_nxv2i8_nxv2i8_i8( %0, i8 %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vxm_nxv2i8_nxv2i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4 +; CHECK: vmerge.vxm {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0 + %a = call @llvm.riscv.vmerge.nxv2i8.i8( + %0, + i8 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmerge.nxv4i8.i8( + , + i8, + , + i32); + +define @intrinsic_vmerge_vxm_nxv4i8_nxv4i8_i8( %0, i8 %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vxm_nxv4i8_nxv4i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2 +; CHECK: vmerge.vxm {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0 + %a = call @llvm.riscv.vmerge.nxv4i8.i8( + %0, + i8 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmerge.nxv8i8.i8( + , + i8, + , + i32); + +define @intrinsic_vmerge_vxm_nxv8i8_nxv8i8_i8( %0, i8 %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vxm_nxv8i8_nxv8i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1 +; CHECK: vmerge.vxm {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0 + %a = call @llvm.riscv.vmerge.nxv8i8.i8( + %0, + i8 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmerge.nxv16i8.i8( + , + i8, + , + i32); + +define @intrinsic_vmerge_vxm_nxv16i8_nxv16i8_i8( %0, i8 %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vxm_nxv16i8_nxv16i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2 +; CHECK: vmerge.vxm {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0 + %a = call @llvm.riscv.vmerge.nxv16i8.i8( + %0, + i8 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmerge.nxv32i8.i8( + , + i8, + , + i32); + +define @intrinsic_vmerge_vxm_nxv32i8_nxv32i8_i8( %0, i8 %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vxm_nxv32i8_nxv32i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4 +; CHECK: vmerge.vxm {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0 + %a = call @llvm.riscv.vmerge.nxv32i8.i8( + %0, + i8 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmerge.nxv64i8.i8( + , + i8, + , + i32); + +define @intrinsic_vmerge_vxm_nxv64i8_nxv64i8_i8( %0, i8 %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vxm_nxv64i8_nxv64i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m8 +; CHECK: vmerge.vxm {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0 + %a = call @llvm.riscv.vmerge.nxv64i8.i8( + %0, + i8 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmerge.nxv1i16.i16( + , + i16, + , + i32); + +define @intrinsic_vmerge_vxm_nxv1i16_nxv1i16_i16( %0, i16 %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vxm_nxv1i16_nxv1i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4 +; CHECK: vmerge.vxm {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0 + %a = call @llvm.riscv.vmerge.nxv1i16.i16( + %0, + i16 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmerge.nxv2i16.i16( + , + i16, + , + i32); + +define @intrinsic_vmerge_vxm_nxv2i16_nxv2i16_i16( %0, i16 %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vxm_nxv2i16_nxv2i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2 +; CHECK: vmerge.vxm {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0 + %a = call @llvm.riscv.vmerge.nxv2i16.i16( + %0, + i16 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmerge.nxv4i16.i16( + , + i16, + , + i32); + +define @intrinsic_vmerge_vxm_nxv4i16_nxv4i16_i16( %0, i16 %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vxm_nxv4i16_nxv4i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1 +; CHECK: vmerge.vxm {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0 + %a = call @llvm.riscv.vmerge.nxv4i16.i16( + %0, + i16 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmerge.nxv8i16.i16( + , + i16, + , + i32); + +define @intrinsic_vmerge_vxm_nxv8i16_nxv8i16_i16( %0, i16 %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vxm_nxv8i16_nxv8i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2 +; CHECK: vmerge.vxm {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0 + %a = call @llvm.riscv.vmerge.nxv8i16.i16( + %0, + i16 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmerge.nxv16i16.i16( + , + i16, + , + i32); + +define @intrinsic_vmerge_vxm_nxv16i16_nxv16i16_i16( %0, i16 %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vxm_nxv16i16_nxv16i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4 +; CHECK: vmerge.vxm {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0 + %a = call @llvm.riscv.vmerge.nxv16i16.i16( + %0, + i16 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmerge.nxv32i16.i16( + , + i16, + , + i32); + +define @intrinsic_vmerge_vxm_nxv32i16_nxv32i16_i16( %0, i16 %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vxm_nxv32i16_nxv32i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m8 +; CHECK: vmerge.vxm {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0 + %a = call @llvm.riscv.vmerge.nxv32i16.i16( + %0, + i16 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmerge.nxv1i32.i32( + , + i32, + , + i32); + +define @intrinsic_vmerge_vxm_nxv1i32_nxv1i32_i32( %0, i32 %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vxm_nxv1i32_nxv1i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2 +; CHECK: vmerge.vxm {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0 + %a = call @llvm.riscv.vmerge.nxv1i32.i32( + %0, + i32 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmerge.nxv2i32.i32( + , + i32, + , + i32); + +define @intrinsic_vmerge_vxm_nxv2i32_nxv2i32_i32( %0, i32 %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vxm_nxv2i32_nxv2i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1 +; CHECK: vmerge.vxm {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0 + %a = call @llvm.riscv.vmerge.nxv2i32.i32( + %0, + i32 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmerge.nxv4i32.i32( + , + i32, + , + i32); + +define @intrinsic_vmerge_vxm_nxv4i32_nxv4i32_i32( %0, i32 %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vxm_nxv4i32_nxv4i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2 +; CHECK: vmerge.vxm {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0 + %a = call @llvm.riscv.vmerge.nxv4i32.i32( + %0, + i32 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmerge.nxv8i32.i32( + , + i32, + , + i32); + +define @intrinsic_vmerge_vxm_nxv8i32_nxv8i32_i32( %0, i32 %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vxm_nxv8i32_nxv8i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4 +; CHECK: vmerge.vxm {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0 + %a = call @llvm.riscv.vmerge.nxv8i32.i32( + %0, + i32 %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmerge.nxv16i32.i32( + , + i32, + , + i32); + +define @intrinsic_vmerge_vxm_nxv16i32_nxv16i32_i32( %0, i32 %1, %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vxm_nxv16i32_nxv16i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m8 +; CHECK: vmerge.vxm {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0 + %a = call @llvm.riscv.vmerge.nxv16i32.i32( + %0, + i32 %1, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vmerge_vim_nxv1i8_nxv1i8_i8( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vim_nxv1i8_nxv1i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8 +; CHECK: vmerge.vim {{v[0-9]+}}, {{v[0-9]+}}, 9, v0 + %a = call @llvm.riscv.vmerge.nxv1i8.i8( + %0, + i8 9, + %1, + i32 %2) + + ret %a +} + +define @intrinsic_vmerge_vim_nxv2i8_nxv2i8_i8( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vim_nxv2i8_nxv2i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4 +; CHECK: vmerge.vim {{v[0-9]+}}, {{v[0-9]+}}, 9, v0 + %a = call @llvm.riscv.vmerge.nxv2i8.i8( + %0, + i8 9, + %1, + i32 %2) + + ret %a +} + +define @intrinsic_vmerge_vim_nxv4i8_nxv4i8_i8( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vim_nxv4i8_nxv4i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2 +; CHECK: vmerge.vim {{v[0-9]+}}, {{v[0-9]+}}, 9, v0 + %a = call @llvm.riscv.vmerge.nxv4i8.i8( + %0, + i8 9, + %1, + i32 %2) + + ret %a +} + +define @intrinsic_vmerge_vim_nxv8i8_nxv8i8_i8( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vim_nxv8i8_nxv8i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1 +; CHECK: vmerge.vim {{v[0-9]+}}, {{v[0-9]+}}, 9, v0 + %a = call @llvm.riscv.vmerge.nxv8i8.i8( + %0, + i8 9, + %1, + i32 %2) + + ret %a +} + +define @intrinsic_vmerge_vim_nxv16i8_nxv16i8_i8( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vim_nxv16i8_nxv16i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2 +; CHECK: vmerge.vim {{v[0-9]+}}, {{v[0-9]+}}, 9, v0 + %a = call @llvm.riscv.vmerge.nxv16i8.i8( + %0, + i8 9, + %1, + i32 %2) + + ret %a +} + +define @intrinsic_vmerge_vim_nxv32i8_nxv32i8_i8( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vim_nxv32i8_nxv32i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4 +; CHECK: vmerge.vim {{v[0-9]+}}, {{v[0-9]+}}, 9, v0 + %a = call @llvm.riscv.vmerge.nxv32i8.i8( + %0, + i8 9, + %1, + i32 %2) + + ret %a +} + +define @intrinsic_vmerge_vim_nxv64i8_nxv64i8_i8( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vim_nxv64i8_nxv64i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m8 +; CHECK: vmerge.vim {{v[0-9]+}}, {{v[0-9]+}}, 9, v0 + %a = call @llvm.riscv.vmerge.nxv64i8.i8( + %0, + i8 9, + %1, + i32 %2) + + ret %a +} + +define @intrinsic_vmerge_vim_nxv1i16_nxv1i16_i16( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vim_nxv1i16_nxv1i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4 +; CHECK: vmerge.vim {{v[0-9]+}}, {{v[0-9]+}}, 9, v0 + %a = call @llvm.riscv.vmerge.nxv1i16.i16( + %0, + i16 9, + %1, + i32 %2) + + ret %a +} + +define @intrinsic_vmerge_vim_nxv2i16_nxv2i16_i16( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vim_nxv2i16_nxv2i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2 +; CHECK: vmerge.vim {{v[0-9]+}}, {{v[0-9]+}}, 9, v0 + %a = call @llvm.riscv.vmerge.nxv2i16.i16( + %0, + i16 9, + %1, + i32 %2) + + ret %a +} + +define @intrinsic_vmerge_vim_nxv4i16_nxv4i16_i16( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vim_nxv4i16_nxv4i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1 +; CHECK: vmerge.vim {{v[0-9]+}}, {{v[0-9]+}}, 9, v0 + %a = call @llvm.riscv.vmerge.nxv4i16.i16( + %0, + i16 9, + %1, + i32 %2) + + ret %a +} + +define @intrinsic_vmerge_vim_nxv8i16_nxv8i16_i16( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vim_nxv8i16_nxv8i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2 +; CHECK: vmerge.vim {{v[0-9]+}}, {{v[0-9]+}}, 9, v0 + %a = call @llvm.riscv.vmerge.nxv8i16.i16( + %0, + i16 9, + %1, + i32 %2) + + ret %a +} + +define @intrinsic_vmerge_vim_nxv16i16_nxv16i16_i16( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vim_nxv16i16_nxv16i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4 +; CHECK: vmerge.vim {{v[0-9]+}}, {{v[0-9]+}}, 9, v0 + %a = call @llvm.riscv.vmerge.nxv16i16.i16( + %0, + i16 9, + %1, + i32 %2) + + ret %a +} + +define @intrinsic_vmerge_vim_nxv32i16_nxv32i16_i16( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vim_nxv32i16_nxv32i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m8 +; CHECK: vmerge.vim {{v[0-9]+}}, {{v[0-9]+}}, 9, v0 + %a = call @llvm.riscv.vmerge.nxv32i16.i16( + %0, + i16 9, + %1, + i32 %2) + + ret %a +} + +define @intrinsic_vmerge_vim_nxv1i32_nxv1i32_i32( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vim_nxv1i32_nxv1i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2 +; CHECK: vmerge.vim {{v[0-9]+}}, {{v[0-9]+}}, 9, v0 + %a = call @llvm.riscv.vmerge.nxv1i32.i32( + %0, + i32 9, + %1, + i32 %2) + + ret %a +} + +define @intrinsic_vmerge_vim_nxv2i32_nxv2i32_i32( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vim_nxv2i32_nxv2i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1 +; CHECK: vmerge.vim {{v[0-9]+}}, {{v[0-9]+}}, 9, v0 + %a = call @llvm.riscv.vmerge.nxv2i32.i32( + %0, + i32 9, + %1, + i32 %2) + + ret %a +} + +define @intrinsic_vmerge_vim_nxv4i32_nxv4i32_i32( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vim_nxv4i32_nxv4i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2 +; CHECK: vmerge.vim {{v[0-9]+}}, {{v[0-9]+}}, 9, v0 + %a = call @llvm.riscv.vmerge.nxv4i32.i32( + %0, + i32 9, + %1, + i32 %2) + + ret %a +} + +define @intrinsic_vmerge_vim_nxv8i32_nxv8i32_i32( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vim_nxv8i32_nxv8i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4 +; CHECK: vmerge.vim {{v[0-9]+}}, {{v[0-9]+}}, 9, v0 + %a = call @llvm.riscv.vmerge.nxv8i32.i32( + %0, + i32 9, + %1, + i32 %2) + + ret %a +} + +define @intrinsic_vmerge_vim_nxv16i32_nxv16i32_i32( %0, %1, i32 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vim_nxv16i32_nxv16i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m8 +; CHECK: vmerge.vim {{v[0-9]+}}, {{v[0-9]+}}, 9, v0 + %a = call @llvm.riscv.vmerge.nxv16i32.i32( + %0, + i32 9, + %1, + i32 %2) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vmerge-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmerge-rv64.ll new file mode 100644 index 0000000000000..c8a8f113bf4ef --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vmerge-rv64.ll @@ -0,0 +1,1189 @@ +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare @llvm.riscv.vmerge.nxv1i8.nxv1i8( + , + , + , + i64); + +define @intrinsic_vmerge_vvm_nxv1i8_nxv1i8_nxv1i8( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vvm_nxv1i8_nxv1i8_nxv1i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8 +; CHECK: vmerge.vvm {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0 + %a = call @llvm.riscv.vmerge.nxv1i8.nxv1i8( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmerge.nxv2i8.nxv2i8( + , + , + , + i64); + +define @intrinsic_vmerge_vvm_nxv2i8_nxv2i8_nxv2i8( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vvm_nxv2i8_nxv2i8_nxv2i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4 +; CHECK: vmerge.vvm {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0 + %a = call @llvm.riscv.vmerge.nxv2i8.nxv2i8( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmerge.nxv4i8.nxv4i8( + , + , + , + i64); + +define @intrinsic_vmerge_vvm_nxv4i8_nxv4i8_nxv4i8( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vvm_nxv4i8_nxv4i8_nxv4i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2 +; CHECK: vmerge.vvm {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0 + %a = call @llvm.riscv.vmerge.nxv4i8.nxv4i8( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmerge.nxv8i8.nxv8i8( + , + , + , + i64); + +define @intrinsic_vmerge_vvm_nxv8i8_nxv8i8_nxv8i8( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vvm_nxv8i8_nxv8i8_nxv8i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1 +; CHECK: vmerge.vvm {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0 + %a = call @llvm.riscv.vmerge.nxv8i8.nxv8i8( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmerge.nxv16i8.nxv16i8( + , + , + , + i64); + +define @intrinsic_vmerge_vvm_nxv16i8_nxv16i8_nxv16i8( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vvm_nxv16i8_nxv16i8_nxv16i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2 +; CHECK: vmerge.vvm {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0 + %a = call @llvm.riscv.vmerge.nxv16i8.nxv16i8( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmerge.nxv32i8.nxv32i8( + , + , + , + i64); + +define @intrinsic_vmerge_vvm_nxv32i8_nxv32i8_nxv32i8( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vvm_nxv32i8_nxv32i8_nxv32i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4 +; CHECK: vmerge.vvm {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0 + %a = call @llvm.riscv.vmerge.nxv32i8.nxv32i8( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmerge.nxv64i8.nxv64i8( + , + , + , + i64); + +define @intrinsic_vmerge_vvm_nxv64i8_nxv64i8_nxv64i8( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vvm_nxv64i8_nxv64i8_nxv64i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m8 +; CHECK: vmerge.vvm {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0 + %a = call @llvm.riscv.vmerge.nxv64i8.nxv64i8( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmerge.nxv1i16.nxv1i16( + , + , + , + i64); + +define @intrinsic_vmerge_vvm_nxv1i16_nxv1i16_nxv1i16( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vvm_nxv1i16_nxv1i16_nxv1i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4 +; CHECK: vmerge.vvm {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0 + %a = call @llvm.riscv.vmerge.nxv1i16.nxv1i16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmerge.nxv2i16.nxv2i16( + , + , + , + i64); + +define @intrinsic_vmerge_vvm_nxv2i16_nxv2i16_nxv2i16( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vvm_nxv2i16_nxv2i16_nxv2i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2 +; CHECK: vmerge.vvm {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0 + %a = call @llvm.riscv.vmerge.nxv2i16.nxv2i16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmerge.nxv4i16.nxv4i16( + , + , + , + i64); + +define @intrinsic_vmerge_vvm_nxv4i16_nxv4i16_nxv4i16( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vvm_nxv4i16_nxv4i16_nxv4i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1 +; CHECK: vmerge.vvm {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0 + %a = call @llvm.riscv.vmerge.nxv4i16.nxv4i16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmerge.nxv8i16.nxv8i16( + , + , + , + i64); + +define @intrinsic_vmerge_vvm_nxv8i16_nxv8i16_nxv8i16( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vvm_nxv8i16_nxv8i16_nxv8i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2 +; CHECK: vmerge.vvm {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0 + %a = call @llvm.riscv.vmerge.nxv8i16.nxv8i16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmerge.nxv16i16.nxv16i16( + , + , + , + i64); + +define @intrinsic_vmerge_vvm_nxv16i16_nxv16i16_nxv16i16( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vvm_nxv16i16_nxv16i16_nxv16i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4 +; CHECK: vmerge.vvm {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0 + %a = call @llvm.riscv.vmerge.nxv16i16.nxv16i16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmerge.nxv32i16.nxv32i16( + , + , + , + i64); + +define @intrinsic_vmerge_vvm_nxv32i16_nxv32i16_nxv32i16( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vvm_nxv32i16_nxv32i16_nxv32i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m8 +; CHECK: vmerge.vvm {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0 + %a = call @llvm.riscv.vmerge.nxv32i16.nxv32i16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmerge.nxv1i32.nxv1i32( + , + , + , + i64); + +define @intrinsic_vmerge_vvm_nxv1i32_nxv1i32_nxv1i32( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vvm_nxv1i32_nxv1i32_nxv1i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2 +; CHECK: vmerge.vvm {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0 + %a = call @llvm.riscv.vmerge.nxv1i32.nxv1i32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmerge.nxv2i32.nxv2i32( + , + , + , + i64); + +define @intrinsic_vmerge_vvm_nxv2i32_nxv2i32_nxv2i32( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vvm_nxv2i32_nxv2i32_nxv2i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1 +; CHECK: vmerge.vvm {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0 + %a = call @llvm.riscv.vmerge.nxv2i32.nxv2i32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmerge.nxv4i32.nxv4i32( + , + , + , + i64); + +define @intrinsic_vmerge_vvm_nxv4i32_nxv4i32_nxv4i32( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vvm_nxv4i32_nxv4i32_nxv4i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2 +; CHECK: vmerge.vvm {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0 + %a = call @llvm.riscv.vmerge.nxv4i32.nxv4i32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmerge.nxv8i32.nxv8i32( + , + , + , + i64); + +define @intrinsic_vmerge_vvm_nxv8i32_nxv8i32_nxv8i32( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vvm_nxv8i32_nxv8i32_nxv8i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4 +; CHECK: vmerge.vvm {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0 + %a = call @llvm.riscv.vmerge.nxv8i32.nxv8i32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmerge.nxv16i32.nxv16i32( + , + , + , + i64); + +define @intrinsic_vmerge_vvm_nxv16i32_nxv16i32_nxv16i32( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vvm_nxv16i32_nxv16i32_nxv16i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m8 +; CHECK: vmerge.vvm {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0 + %a = call @llvm.riscv.vmerge.nxv16i32.nxv16i32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmerge.nxv1i64.nxv1i64( + , + , + , + i64); + +define @intrinsic_vmerge_vvm_nxv1i64_nxv1i64_nxv1i64( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vvm_nxv1i64_nxv1i64_nxv1i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1 +; CHECK: vmerge.vvm {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0 + %a = call @llvm.riscv.vmerge.nxv1i64.nxv1i64( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmerge.nxv2i64.nxv2i64( + , + , + , + i64); + +define @intrinsic_vmerge_vvm_nxv2i64_nxv2i64_nxv2i64( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vvm_nxv2i64_nxv2i64_nxv2i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2 +; CHECK: vmerge.vvm {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0 + %a = call @llvm.riscv.vmerge.nxv2i64.nxv2i64( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmerge.nxv4i64.nxv4i64( + , + , + , + i64); + +define @intrinsic_vmerge_vvm_nxv4i64_nxv4i64_nxv4i64( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vvm_nxv4i64_nxv4i64_nxv4i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4 +; CHECK: vmerge.vvm {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0 + %a = call @llvm.riscv.vmerge.nxv4i64.nxv4i64( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmerge.nxv8i64.nxv8i64( + , + , + , + i64); + +define @intrinsic_vmerge_vvm_nxv8i64_nxv8i64_nxv8i64( %0, %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vvm_nxv8i64_nxv8i64_nxv8i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m8 +; CHECK: vmerge.vvm {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0 + %a = call @llvm.riscv.vmerge.nxv8i64.nxv8i64( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmerge.nxv1i8.i8( + , + i8, + , + i64); + +define @intrinsic_vmerge_vxm_nxv1i8_nxv1i8_i8( %0, i8 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vxm_nxv1i8_nxv1i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8 +; CHECK: vmerge.vxm {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0 + %a = call @llvm.riscv.vmerge.nxv1i8.i8( + %0, + i8 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmerge.nxv2i8.i8( + , + i8, + , + i64); + +define @intrinsic_vmerge_vxm_nxv2i8_nxv2i8_i8( %0, i8 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vxm_nxv2i8_nxv2i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4 +; CHECK: vmerge.vxm {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0 + %a = call @llvm.riscv.vmerge.nxv2i8.i8( + %0, + i8 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmerge.nxv4i8.i8( + , + i8, + , + i64); + +define @intrinsic_vmerge_vxm_nxv4i8_nxv4i8_i8( %0, i8 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vxm_nxv4i8_nxv4i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2 +; CHECK: vmerge.vxm {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0 + %a = call @llvm.riscv.vmerge.nxv4i8.i8( + %0, + i8 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmerge.nxv8i8.i8( + , + i8, + , + i64); + +define @intrinsic_vmerge_vxm_nxv8i8_nxv8i8_i8( %0, i8 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vxm_nxv8i8_nxv8i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1 +; CHECK: vmerge.vxm {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0 + %a = call @llvm.riscv.vmerge.nxv8i8.i8( + %0, + i8 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmerge.nxv16i8.i8( + , + i8, + , + i64); + +define @intrinsic_vmerge_vxm_nxv16i8_nxv16i8_i8( %0, i8 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vxm_nxv16i8_nxv16i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2 +; CHECK: vmerge.vxm {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0 + %a = call @llvm.riscv.vmerge.nxv16i8.i8( + %0, + i8 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmerge.nxv32i8.i8( + , + i8, + , + i64); + +define @intrinsic_vmerge_vxm_nxv32i8_nxv32i8_i8( %0, i8 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vxm_nxv32i8_nxv32i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4 +; CHECK: vmerge.vxm {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0 + %a = call @llvm.riscv.vmerge.nxv32i8.i8( + %0, + i8 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmerge.nxv64i8.i8( + , + i8, + , + i64); + +define @intrinsic_vmerge_vxm_nxv64i8_nxv64i8_i8( %0, i8 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vxm_nxv64i8_nxv64i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m8 +; CHECK: vmerge.vxm {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0 + %a = call @llvm.riscv.vmerge.nxv64i8.i8( + %0, + i8 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmerge.nxv1i16.i16( + , + i16, + , + i64); + +define @intrinsic_vmerge_vxm_nxv1i16_nxv1i16_i16( %0, i16 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vxm_nxv1i16_nxv1i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4 +; CHECK: vmerge.vxm {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0 + %a = call @llvm.riscv.vmerge.nxv1i16.i16( + %0, + i16 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmerge.nxv2i16.i16( + , + i16, + , + i64); + +define @intrinsic_vmerge_vxm_nxv2i16_nxv2i16_i16( %0, i16 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vxm_nxv2i16_nxv2i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2 +; CHECK: vmerge.vxm {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0 + %a = call @llvm.riscv.vmerge.nxv2i16.i16( + %0, + i16 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmerge.nxv4i16.i16( + , + i16, + , + i64); + +define @intrinsic_vmerge_vxm_nxv4i16_nxv4i16_i16( %0, i16 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vxm_nxv4i16_nxv4i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1 +; CHECK: vmerge.vxm {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0 + %a = call @llvm.riscv.vmerge.nxv4i16.i16( + %0, + i16 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmerge.nxv8i16.i16( + , + i16, + , + i64); + +define @intrinsic_vmerge_vxm_nxv8i16_nxv8i16_i16( %0, i16 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vxm_nxv8i16_nxv8i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2 +; CHECK: vmerge.vxm {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0 + %a = call @llvm.riscv.vmerge.nxv8i16.i16( + %0, + i16 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmerge.nxv16i16.i16( + , + i16, + , + i64); + +define @intrinsic_vmerge_vxm_nxv16i16_nxv16i16_i16( %0, i16 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vxm_nxv16i16_nxv16i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4 +; CHECK: vmerge.vxm {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0 + %a = call @llvm.riscv.vmerge.nxv16i16.i16( + %0, + i16 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmerge.nxv32i16.i16( + , + i16, + , + i64); + +define @intrinsic_vmerge_vxm_nxv32i16_nxv32i16_i16( %0, i16 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vxm_nxv32i16_nxv32i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m8 +; CHECK: vmerge.vxm {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0 + %a = call @llvm.riscv.vmerge.nxv32i16.i16( + %0, + i16 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmerge.nxv1i32.i32( + , + i32, + , + i64); + +define @intrinsic_vmerge_vxm_nxv1i32_nxv1i32_i32( %0, i32 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vxm_nxv1i32_nxv1i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2 +; CHECK: vmerge.vxm {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0 + %a = call @llvm.riscv.vmerge.nxv1i32.i32( + %0, + i32 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmerge.nxv2i32.i32( + , + i32, + , + i64); + +define @intrinsic_vmerge_vxm_nxv2i32_nxv2i32_i32( %0, i32 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vxm_nxv2i32_nxv2i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1 +; CHECK: vmerge.vxm {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0 + %a = call @llvm.riscv.vmerge.nxv2i32.i32( + %0, + i32 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmerge.nxv4i32.i32( + , + i32, + , + i64); + +define @intrinsic_vmerge_vxm_nxv4i32_nxv4i32_i32( %0, i32 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vxm_nxv4i32_nxv4i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2 +; CHECK: vmerge.vxm {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0 + %a = call @llvm.riscv.vmerge.nxv4i32.i32( + %0, + i32 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmerge.nxv8i32.i32( + , + i32, + , + i64); + +define @intrinsic_vmerge_vxm_nxv8i32_nxv8i32_i32( %0, i32 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vxm_nxv8i32_nxv8i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4 +; CHECK: vmerge.vxm {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0 + %a = call @llvm.riscv.vmerge.nxv8i32.i32( + %0, + i32 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmerge.nxv16i32.i32( + , + i32, + , + i64); + +define @intrinsic_vmerge_vxm_nxv16i32_nxv16i32_i32( %0, i32 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vxm_nxv16i32_nxv16i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m8 +; CHECK: vmerge.vxm {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0 + %a = call @llvm.riscv.vmerge.nxv16i32.i32( + %0, + i32 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmerge.nxv1i64.i64( + , + i64, + , + i64); + +define @intrinsic_vmerge_vxm_nxv1i64_nxv1i64_i64( %0, i64 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vxm_nxv1i64_nxv1i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1 +; CHECK: vmerge.vxm {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0 + %a = call @llvm.riscv.vmerge.nxv1i64.i64( + %0, + i64 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmerge.nxv2i64.i64( + , + i64, + , + i64); + +define @intrinsic_vmerge_vxm_nxv2i64_nxv2i64_i64( %0, i64 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vxm_nxv2i64_nxv2i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2 +; CHECK: vmerge.vxm {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0 + %a = call @llvm.riscv.vmerge.nxv2i64.i64( + %0, + i64 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmerge.nxv4i64.i64( + , + i64, + , + i64); + +define @intrinsic_vmerge_vxm_nxv4i64_nxv4i64_i64( %0, i64 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vxm_nxv4i64_nxv4i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4 +; CHECK: vmerge.vxm {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0 + %a = call @llvm.riscv.vmerge.nxv4i64.i64( + %0, + i64 %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmerge.nxv8i64.i64( + , + i64, + , + i64); + +define @intrinsic_vmerge_vxm_nxv8i64_nxv8i64_i64( %0, i64 %1, %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vxm_nxv8i64_nxv8i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m8 +; CHECK: vmerge.vxm {{v[0-9]+}}, {{v[0-9]+}}, {{a[0-9]+}}, v0 + %a = call @llvm.riscv.vmerge.nxv8i64.i64( + %0, + i64 %1, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vmerge_vim_nxv1i8_nxv1i8_i8( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vim_nxv1i8_nxv1i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf8 +; CHECK: vmerge.vim {{v[0-9]+}}, {{v[0-9]+}}, 9, v0 + %a = call @llvm.riscv.vmerge.nxv1i8.i8( + %0, + i8 9, + %1, + i64 %2) + + ret %a +} + +define @intrinsic_vmerge_vim_nxv2i8_nxv2i8_i8( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vim_nxv2i8_nxv2i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf4 +; CHECK: vmerge.vim {{v[0-9]+}}, {{v[0-9]+}}, 9, v0 + %a = call @llvm.riscv.vmerge.nxv2i8.i8( + %0, + i8 9, + %1, + i64 %2) + + ret %a +} + +define @intrinsic_vmerge_vim_nxv4i8_nxv4i8_i8( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vim_nxv4i8_nxv4i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,mf2 +; CHECK: vmerge.vim {{v[0-9]+}}, {{v[0-9]+}}, 9, v0 + %a = call @llvm.riscv.vmerge.nxv4i8.i8( + %0, + i8 9, + %1, + i64 %2) + + ret %a +} + +define @intrinsic_vmerge_vim_nxv8i8_nxv8i8_i8( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vim_nxv8i8_nxv8i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m1 +; CHECK: vmerge.vim {{v[0-9]+}}, {{v[0-9]+}}, 9, v0 + %a = call @llvm.riscv.vmerge.nxv8i8.i8( + %0, + i8 9, + %1, + i64 %2) + + ret %a +} + +define @intrinsic_vmerge_vim_nxv16i8_nxv16i8_i8( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vim_nxv16i8_nxv16i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m2 +; CHECK: vmerge.vim {{v[0-9]+}}, {{v[0-9]+}}, 9, v0 + %a = call @llvm.riscv.vmerge.nxv16i8.i8( + %0, + i8 9, + %1, + i64 %2) + + ret %a +} + +define @intrinsic_vmerge_vim_nxv32i8_nxv32i8_i8( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vim_nxv32i8_nxv32i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m4 +; CHECK: vmerge.vim {{v[0-9]+}}, {{v[0-9]+}}, 9, v0 + %a = call @llvm.riscv.vmerge.nxv32i8.i8( + %0, + i8 9, + %1, + i64 %2) + + ret %a +} + +define @intrinsic_vmerge_vim_nxv64i8_nxv64i8_i8( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vim_nxv64i8_nxv64i8_i8 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e8,m8 +; CHECK: vmerge.vim {{v[0-9]+}}, {{v[0-9]+}}, 9, v0 + %a = call @llvm.riscv.vmerge.nxv64i8.i8( + %0, + i8 9, + %1, + i64 %2) + + ret %a +} + +define @intrinsic_vmerge_vim_nxv1i16_nxv1i16_i16( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vim_nxv1i16_nxv1i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf4 +; CHECK: vmerge.vim {{v[0-9]+}}, {{v[0-9]+}}, 9, v0 + %a = call @llvm.riscv.vmerge.nxv1i16.i16( + %0, + i16 9, + %1, + i64 %2) + + ret %a +} + +define @intrinsic_vmerge_vim_nxv2i16_nxv2i16_i16( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vim_nxv2i16_nxv2i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,mf2 +; CHECK: vmerge.vim {{v[0-9]+}}, {{v[0-9]+}}, 9, v0 + %a = call @llvm.riscv.vmerge.nxv2i16.i16( + %0, + i16 9, + %1, + i64 %2) + + ret %a +} + +define @intrinsic_vmerge_vim_nxv4i16_nxv4i16_i16( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vim_nxv4i16_nxv4i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m1 +; CHECK: vmerge.vim {{v[0-9]+}}, {{v[0-9]+}}, 9, v0 + %a = call @llvm.riscv.vmerge.nxv4i16.i16( + %0, + i16 9, + %1, + i64 %2) + + ret %a +} + +define @intrinsic_vmerge_vim_nxv8i16_nxv8i16_i16( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vim_nxv8i16_nxv8i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m2 +; CHECK: vmerge.vim {{v[0-9]+}}, {{v[0-9]+}}, 9, v0 + %a = call @llvm.riscv.vmerge.nxv8i16.i16( + %0, + i16 9, + %1, + i64 %2) + + ret %a +} + +define @intrinsic_vmerge_vim_nxv16i16_nxv16i16_i16( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vim_nxv16i16_nxv16i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m4 +; CHECK: vmerge.vim {{v[0-9]+}}, {{v[0-9]+}}, 9, v0 + %a = call @llvm.riscv.vmerge.nxv16i16.i16( + %0, + i16 9, + %1, + i64 %2) + + ret %a +} + +define @intrinsic_vmerge_vim_nxv32i16_nxv32i16_i16( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vim_nxv32i16_nxv32i16_i16 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m8 +; CHECK: vmerge.vim {{v[0-9]+}}, {{v[0-9]+}}, 9, v0 + %a = call @llvm.riscv.vmerge.nxv32i16.i16( + %0, + i16 9, + %1, + i64 %2) + + ret %a +} + +define @intrinsic_vmerge_vim_nxv1i32_nxv1i32_i32( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vim_nxv1i32_nxv1i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,mf2 +; CHECK: vmerge.vim {{v[0-9]+}}, {{v[0-9]+}}, 9, v0 + %a = call @llvm.riscv.vmerge.nxv1i32.i32( + %0, + i32 9, + %1, + i64 %2) + + ret %a +} + +define @intrinsic_vmerge_vim_nxv2i32_nxv2i32_i32( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vim_nxv2i32_nxv2i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m1 +; CHECK: vmerge.vim {{v[0-9]+}}, {{v[0-9]+}}, 9, v0 + %a = call @llvm.riscv.vmerge.nxv2i32.i32( + %0, + i32 9, + %1, + i64 %2) + + ret %a +} + +define @intrinsic_vmerge_vim_nxv4i32_nxv4i32_i32( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vim_nxv4i32_nxv4i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m2 +; CHECK: vmerge.vim {{v[0-9]+}}, {{v[0-9]+}}, 9, v0 + %a = call @llvm.riscv.vmerge.nxv4i32.i32( + %0, + i32 9, + %1, + i64 %2) + + ret %a +} + +define @intrinsic_vmerge_vim_nxv8i32_nxv8i32_i32( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vim_nxv8i32_nxv8i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m4 +; CHECK: vmerge.vim {{v[0-9]+}}, {{v[0-9]+}}, 9, v0 + %a = call @llvm.riscv.vmerge.nxv8i32.i32( + %0, + i32 9, + %1, + i64 %2) + + ret %a +} + +define @intrinsic_vmerge_vim_nxv16i32_nxv16i32_i32( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vim_nxv16i32_nxv16i32_i32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m8 +; CHECK: vmerge.vim {{v[0-9]+}}, {{v[0-9]+}}, 9, v0 + %a = call @llvm.riscv.vmerge.nxv16i32.i32( + %0, + i32 9, + %1, + i64 %2) + + ret %a +} + +define @intrinsic_vmerge_vim_nxv1i64_nxv1i64_i64( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vim_nxv1i64_nxv1i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m1 +; CHECK: vmerge.vim {{v[0-9]+}}, {{v[0-9]+}}, 9, v0 + %a = call @llvm.riscv.vmerge.nxv1i64.i64( + %0, + i64 9, + %1, + i64 %2) + + ret %a +} + +define @intrinsic_vmerge_vim_nxv2i64_nxv2i64_i64( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vim_nxv2i64_nxv2i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m2 +; CHECK: vmerge.vim {{v[0-9]+}}, {{v[0-9]+}}, 9, v0 + %a = call @llvm.riscv.vmerge.nxv2i64.i64( + %0, + i64 9, + %1, + i64 %2) + + ret %a +} + +define @intrinsic_vmerge_vim_nxv4i64_nxv4i64_i64( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vim_nxv4i64_nxv4i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m4 +; CHECK: vmerge.vim {{v[0-9]+}}, {{v[0-9]+}}, 9, v0 + %a = call @llvm.riscv.vmerge.nxv4i64.i64( + %0, + i64 9, + %1, + i64 %2) + + ret %a +} + +define @intrinsic_vmerge_vim_nxv8i64_nxv8i64_i64( %0, %1, i64 %2) nounwind { +entry: +; CHECK-LABEL: intrinsic_vmerge_vim_nxv8i64_nxv8i64_i64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e64,m8 +; CHECK: vmerge.vim {{v[0-9]+}}, {{v[0-9]+}}, 9, v0 + %a = call @llvm.riscv.vmerge.nxv8i64.i64( + %0, + i64 9, + %1, + i64 %2) + + ret %a +} From bdef1f87aba656a64b34f76d2a6613b6e9299a03 Mon Sep 17 00:00:00 2001 From: Georgii Rymar Date: Tue, 22 Dec 2020 15:21:26 +0300 Subject: [PATCH 158/378] [llvm-readobj] - Dump the ELF file type better. Currently llvm-readelf might print "OS Specific/Processor Specific/" hint when dumping the ELF file type. The patch teaches llvm-readobj to do the same. This fixes https://bugs.llvm.org/show_bug.cgi?id=40868 I am removing `Object/elf-unknown-type.test` test because it is not in the right place, it is outdated and very limited. The `readobj/ELF/file-types.test` checks the functionality much better. Differential revision: https://reviews.llvm.org/D93689 --- llvm/test/Object/elf-unknown-type.test | 10 ----- .../tools/llvm-readobj/ELF/file-types.test | 10 ++--- llvm/tools/llvm-readobj/ELFDumper.cpp | 37 ++++++++++++++----- 3 files changed, 33 insertions(+), 24 deletions(-) delete mode 100644 llvm/test/Object/elf-unknown-type.test diff --git a/llvm/test/Object/elf-unknown-type.test b/llvm/test/Object/elf-unknown-type.test deleted file mode 100644 index 508e831ae90e0..0000000000000 --- a/llvm/test/Object/elf-unknown-type.test +++ /dev/null @@ -1,10 +0,0 @@ -# RUN: yaml2obj %s | llvm-readobj --file-headers - | FileCheck %s - -!ELF -FileHeader: !FileHeader - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: 42 - Machine: EM_X86_64 - -# CHECK: Type: 0x2A diff --git a/llvm/test/tools/llvm-readobj/ELF/file-types.test b/llvm/test/tools/llvm-readobj/ELF/file-types.test index 767ce4d646fff..f06f302b86423 100644 --- a/llvm/test/tools/llvm-readobj/ELF/file-types.test +++ b/llvm/test/tools/llvm-readobj/ELF/file-types.test @@ -62,7 +62,7 @@ FileHeader: # RUN: llvm-readelf -h %t.unknown | FileCheck %s --match-full-lines --check-prefix GNU-UNNKNOWN # LLVM-UNNKNOWN: ElfHeader { -# LLVM-UNNKNOWN: Type: 0xFDFF +# LLVM-UNNKNOWN: Type: Unknown (0xFDFF) # GNU-UNNKNOWN: ELF Header: # GNU-UNNKNOWN: Type: : fdff @@ -72,7 +72,7 @@ FileHeader: # RUN: llvm-readelf -h %t6 | FileCheck %s --match-full-lines --check-prefix GNU-LOOS # LLVM-LOOS: ElfHeader { -# LLVM-LOOS: Type: 0xFE00 +# LLVM-LOOS: Type: OS Specific (0xFE00) # GNU-LOOS: ELF Header: # GNU-LOOS: Type: OS Specific: (fe00) @@ -82,7 +82,7 @@ FileHeader: # RUN: llvm-readelf -h %t7 | FileCheck %s --match-full-lines --check-prefix GNU-HIOS # LLVM-HIOS: ElfHeader { -# LLVM-HIOS: Type: 0xFEFF +# LLVM-HIOS: Type: OS Specific (0xFEFF) # GNU-HIOS: ELF Header: # GNU-HIOS: Type: OS Specific: (feff) @@ -92,7 +92,7 @@ FileHeader: # RUN: llvm-readelf -h %t8 | FileCheck %s --match-full-lines --check-prefix GNU-LOPROC # LLVM-LOPROC: ElfHeader { -# LLVM-LOPROC: Type: 0xFF00 +# LLVM-LOPROC: Type: Processor Specific (0xFF00) # GNU-LOPROC: ELF Header: # GNU-LOPROC: Type: Processor Specific: (ff00) @@ -102,7 +102,7 @@ FileHeader: # RUN: llvm-readelf -h %t9 | FileCheck %s --match-full-lines --check-prefix GNU-HIPROC # LLVM-HIPROC: ElfHeader { -# LLVM-HIPROC: Type: 0xFFFF +# LLVM-HIPROC: Type: Processor Specific (0xFFFF) # GNU-HIPROC: ELF Header: # GNU-HIPROC: Type: Processor Specific: (ffff) diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp index a82494ad1b4d8..00f8c3fcefacd 100644 --- a/llvm/tools/llvm-readobj/ELFDumper.cpp +++ b/llvm/tools/llvm-readobj/ELFDumper.cpp @@ -3515,6 +3515,15 @@ static std::string getSectionHeaderTableIndexString(const ELFFile &Obj, to_string((*ArrOrErr)[0].sh_link) + ")"; } +static const EnumEntry *getObjectFileEnumEntry(unsigned Type) { + auto It = llvm::find_if(ElfObjectFileType, [&](const EnumEntry &E) { + return E.Value == Type; + }); + if (It != makeArrayRef(ElfObjectFileType).end()) + return It; + return nullptr; +} + template void GNUStyle::printFileHeaders() { const Elf_Ehdr &e = this->Obj.getHeader(); OS << "ELF Header:\n"; @@ -3539,17 +3548,15 @@ template void GNUStyle::printFileHeaders() { printFields(OS, "ABI Version:", std::to_string(e.e_ident[ELF::EI_ABIVERSION])); - Str = printEnum(e.e_type, makeArrayRef(ElfObjectFileType)); - if (makeArrayRef(ElfObjectFileType).end() == - llvm::find_if(ElfObjectFileType, [&](const EnumEntry &E) { - return E.Value == e.e_type; - })) { + if (const EnumEntry *E = getObjectFileEnumEntry(e.e_type)) { + Str = E->AltName.str(); + } else { if (e.e_type >= ET_LOPROC) - Str = "Processor Specific: (" + Str + ")"; + Str = "Processor Specific: (" + to_hexString(e.e_type, false) + ")"; else if (e.e_type >= ET_LOOS) - Str = "OS Specific: (" + Str + ")"; + Str = "OS Specific: (" + to_hexString(e.e_type, false) + ")"; else - Str = ": " + Str; + Str = ": " + to_hexString(e.e_type, false); } printFields(OS, "Type:", Str); @@ -6343,7 +6350,19 @@ template void LLVMStyle::printFileHeaders() { W.printBinary("Unused", makeArrayRef(E.e_ident).slice(ELF::EI_PAD)); } - W.printEnum("Type", E.e_type, makeArrayRef(ElfObjectFileType)); + std::string TypeStr; + if (const EnumEntry *Ent = getObjectFileEnumEntry(E.e_type)) { + TypeStr = Ent->Name.str(); + } else { + if (E.e_type >= ET_LOPROC) + TypeStr = "Processor Specific"; + else if (E.e_type >= ET_LOOS) + TypeStr = "OS Specific"; + else + TypeStr = "Unknown"; + } + W.printString("Type", TypeStr + " (0x" + to_hexString(E.e_type) + ")"); + W.printEnum("Machine", E.e_machine, makeArrayRef(ElfMachineType)); W.printNumber("Version", E.e_version); W.printHex("Entry", E.e_entry); From 6301871d06d6ea0195b5ea3b53288dcfe229557a Mon Sep 17 00:00:00 2001 From: ShihPo Hung Date: Tue, 22 Dec 2020 05:30:24 -0800 Subject: [PATCH 159/378] [RISCV] Add intrinsics for vfwmacc, vfwnmacc, vfwmsac, vfwnmsac instructions This patch defines vfwmacc, vfwnmacc, vfwmsc, vfwnmsac intrinsics and lower to V instructions. We work with @rogfer01 from BSC to come out this patch. Authored-by: Roger Ferrer Ibanez Co-Authored-by: ShihPo Hung Differential Revision: https://reviews.llvm.org/D93693 --- llvm/include/llvm/IR/IntrinsicsRISCV.td | 5 + .../Target/RISCV/RISCVInstrInfoVPseudos.td | 36 +- llvm/test/CodeGen/RISCV/rvv/vfwmacc-rv32.ll | 482 ++++++++++ llvm/test/CodeGen/RISCV/rvv/vfwmacc-rv64.ll | 868 ++++++++++++++++++ llvm/test/CodeGen/RISCV/rvv/vfwmsac-rv32.ll | 482 ++++++++++ llvm/test/CodeGen/RISCV/rvv/vfwmsac-rv64.ll | 868 ++++++++++++++++++ llvm/test/CodeGen/RISCV/rvv/vfwnmacc-rv32.ll | 482 ++++++++++ llvm/test/CodeGen/RISCV/rvv/vfwnmacc-rv64.ll | 868 ++++++++++++++++++ llvm/test/CodeGen/RISCV/rvv/vfwnmsac-rv32.ll | 482 ++++++++++ llvm/test/CodeGen/RISCV/rvv/vfwnmsac-rv64.ll | 868 ++++++++++++++++++ 10 files changed, 5432 insertions(+), 9 deletions(-) create mode 100644 llvm/test/CodeGen/RISCV/rvv/vfwmacc-rv32.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vfwmacc-rv64.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vfwmsac-rv32.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vfwmsac-rv64.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vfwnmacc-rv32.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vfwnmacc-rv64.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vfwnmsac-rv32.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vfwnmsac-rv64.ll diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td index 0dcc9e5b1d6c9..13f883713cd37 100644 --- a/llvm/include/llvm/IR/IntrinsicsRISCV.td +++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td @@ -561,6 +561,11 @@ let TargetPrefix = "riscv" in { defm vfmsub : RISCVTernaryAAXA; defm vfnmsub : RISCVTernaryAAXA; + defm vfwmacc : RISCVTernaryWide; + defm vfwnmacc : RISCVTernaryWide; + defm vfwmsac : RISCVTernaryWide; + defm vfwnmsac : RISCVTernaryWide; + defm vfmin : RISCVBinaryAAX; defm vfmax : RISCVBinaryAAX; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td index 1540ea403b926..50142b0373ee3 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -898,10 +898,11 @@ multiclass VPseudoTernaryW_VV { defm _VV : VPseudoTernary; } -multiclass VPseudoTernaryW_VX { +multiclass VPseudoTernaryW_VX { defvar constraint = "@earlyclobber $rd"; foreach m = MxList.m in - defm _VX : VPseudoTernary; + defm !if(IsFloat, "_VF", "_VX") : VPseudoTernary; } multiclass VPseudoTernaryV_VI { @@ -919,9 +920,9 @@ multiclass VPseudoTernaryV_VX_VI; } -multiclass VPseudoTernaryW_VV_VX { +multiclass VPseudoTernaryW_VV_VX { defm "" : VPseudoTernaryW_VV; - defm "" : VPseudoTernaryW_VX; + defm "" : VPseudoTernaryW_VX; } multiclass VPseudoBinaryM_VV_VX_VI { @@ -1631,10 +1632,11 @@ multiclass VPatTernaryW_VX; + defm : VPatTernary; } } @@ -1864,7 +1866,7 @@ defm PseudoVNMSUB : VPseudoTernaryV_VV_VX_AAXA; defm PseudoVWMACCU : VPseudoTernaryW_VV_VX; defm PseudoVWMACC : VPseudoTernaryW_VV_VX; defm PseudoVWMACCSU : VPseudoTernaryW_VV_VX; -defm PseudoVWMACCUS : VPseudoTernaryW_VX; +defm PseudoVWMACCUS : VPseudoTernaryW_VX; //===----------------------------------------------------------------------===// // 12.15. Vector Integer Merge Instructions @@ -1962,6 +1964,14 @@ defm PseudoVFNMADD : VPseudoTernaryV_VV_VX_AAXA; defm PseudoVFMSUB : VPseudoTernaryV_VV_VX_AAXA; defm PseudoVFNMSUB : VPseudoTernaryV_VV_VX_AAXA; +//===----------------------------------------------------------------------===// +// 14.7. Vector Widening Floating-Point Fused Multiply-Add Instructions +//===----------------------------------------------------------------------===// +defm PseudoVFWMACC : VPseudoTernaryW_VV_VX; +defm PseudoVFWNMACC : VPseudoTernaryW_VV_VX; +defm PseudoVFWMSAC : VPseudoTernaryW_VV_VX; +defm PseudoVFWNMSAC : VPseudoTernaryW_VV_VX; + //===----------------------------------------------------------------------===// // 14.9. Vector Floating-Point Min/Max Instructions //===----------------------------------------------------------------------===// @@ -2373,6 +2383,14 @@ defm "" : VPatTernaryV_VV_VX_AAXA<"int_riscv_vfnmadd", "PseudoVFNMADD", AllFloat defm "" : VPatTernaryV_VV_VX_AAXA<"int_riscv_vfmsub", "PseudoVFMSUB", AllFloatVectors>; defm "" : VPatTernaryV_VV_VX_AAXA<"int_riscv_vfnmsub", "PseudoVFNMSUB", AllFloatVectors>; +//===----------------------------------------------------------------------===// +// 14.7. Vector Widening Floating-Point Fused Multiply-Add Instructions +//===----------------------------------------------------------------------===// +defm "" : VPatTernaryW_VV_VX<"int_riscv_vfwmacc", "PseudoVFWMACC", AllWidenableFloatVectors>; +defm "" : VPatTernaryW_VV_VX<"int_riscv_vfwnmacc", "PseudoVFWNMACC", AllWidenableFloatVectors>; +defm "" : VPatTernaryW_VV_VX<"int_riscv_vfwmsac", "PseudoVFWMSAC", AllWidenableFloatVectors>; +defm "" : VPatTernaryW_VV_VX<"int_riscv_vfwnmsac", "PseudoVFWNMSAC", AllWidenableFloatVectors>; + //===----------------------------------------------------------------------===// // 14.9. Vector Floating-Point Min/Max Instructions //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwmacc-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwmacc-rv32.ll new file mode 100644 index 0000000000000..6825ecb121b34 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfwmacc-rv32.ll @@ -0,0 +1,482 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f,+experimental-zfh -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare @llvm.riscv.vfwmacc.nxv1f32.nxv1f16( + , + , + , + i32); + +define @intrinsic_vfwmacc_vv_nxv1f32_nxv1f16_nxv1f16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwmacc_vv_nxv1f32_nxv1f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu +; CHECK-NEXT: vfwmacc.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmacc.nxv1f32.nxv1f16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfwmacc.mask.nxv1f32.nxv1f16( + , + , + , + , + i32); + +define @intrinsic_vfwmacc_mask_vv_nxv1f32_nxv1f16_nxv1f16( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmacc_mask_vv_nxv1f32_nxv1f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu +; CHECK-NEXT: vfwmacc.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmacc.mask.nxv1f32.nxv1f16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfwmacc.nxv2f32.nxv2f16( + , + , + , + i32); + +define @intrinsic_vfwmacc_vv_nxv2f32_nxv2f16_nxv2f16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwmacc_vv_nxv2f32_nxv2f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu +; CHECK-NEXT: vfwmacc.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmacc.nxv2f32.nxv2f16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfwmacc.mask.nxv2f32.nxv2f16( + , + , + , + , + i32); + +define @intrinsic_vfwmacc_mask_vv_nxv2f32_nxv2f16_nxv2f16( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmacc_mask_vv_nxv2f32_nxv2f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu +; CHECK-NEXT: vfwmacc.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmacc.mask.nxv2f32.nxv2f16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfwmacc.nxv4f32.nxv4f16( + , + , + , + i32); + +define @intrinsic_vfwmacc_vv_nxv4f32_nxv4f16_nxv4f16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwmacc_vv_nxv4f32_nxv4f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vfwmacc.vv v16, v18, v19 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmacc.nxv4f32.nxv4f16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfwmacc.mask.nxv4f32.nxv4f16( + , + , + , + , + i32); + +define @intrinsic_vfwmacc_mask_vv_nxv4f32_nxv4f16_nxv4f16( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmacc_mask_vv_nxv4f32_nxv4f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vfwmacc.vv v16, v18, v19, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmacc.mask.nxv4f32.nxv4f16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfwmacc.nxv8f32.nxv8f16( + , + , + , + i32); + +define @intrinsic_vfwmacc_vv_nxv8f32_nxv8f16_nxv8f16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwmacc_vv_nxv8f32_nxv8f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu +; CHECK-NEXT: vfwmacc.vv v16, v20, v22 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmacc.nxv8f32.nxv8f16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfwmacc.mask.nxv8f32.nxv8f16( + , + , + , + , + i32); + +define @intrinsic_vfwmacc_mask_vv_nxv8f32_nxv8f16_nxv8f16( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmacc_mask_vv_nxv8f32_nxv8f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu +; CHECK-NEXT: vfwmacc.vv v16, v20, v22, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmacc.mask.nxv8f32.nxv8f16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfwmacc.nxv16f32.nxv16f16( + , + , + , + i32); + +define @intrinsic_vfwmacc_vv_nxv16f32_nxv16f16_nxv16f16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwmacc_vv_nxv16f32_nxv16f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a1) +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vsetvli a0, a2, e16,m4,ta,mu +; CHECK-NEXT: vfwmacc.vv v16, v8, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmacc.nxv16f32.nxv16f16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfwmacc.mask.nxv16f32.nxv16f16( + , + , + , + , + i32); + +define @intrinsic_vfwmacc_mask_vv_nxv16f32_nxv16f16_nxv16f16( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmacc_mask_vv_nxv16f32_nxv16f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a1) +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vsetvli a0, a2, e16,m4,ta,mu +; CHECK-NEXT: vfwmacc.vv v16, v8, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmacc.mask.nxv16f32.nxv16f16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfwmacc.nxv1f32.f16( + , + half, + , + i32); + +define @intrinsic_vfwmacc_vf_nxv1f32_f16_nxv1f16( %0, half %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwmacc_vf_nxv1f32_f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf4,ta,mu +; CHECK-NEXT: vfwmacc.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmacc.nxv1f32.f16( + %0, + half %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfwmacc.mask.nxv1f32.f16( + , + half, + , + , + i32); + +define @intrinsic_vfwmacc_mask_vf_nxv1f32_f16_nxv1f16( %0, half %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmacc_mask_vf_nxv1f32_f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf4,ta,mu +; CHECK-NEXT: vfwmacc.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmacc.mask.nxv1f32.f16( + %0, + half %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfwmacc.nxv2f32.f16( + , + half, + , + i32); + +define @intrinsic_vfwmacc_vf_nxv2f32_f16_nxv2f16( %0, half %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwmacc_vf_nxv2f32_f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf2,ta,mu +; CHECK-NEXT: vfwmacc.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmacc.nxv2f32.f16( + %0, + half %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfwmacc.mask.nxv2f32.f16( + , + half, + , + , + i32); + +define @intrinsic_vfwmacc_mask_vf_nxv2f32_f16_nxv2f16( %0, half %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmacc_mask_vf_nxv2f32_f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf2,ta,mu +; CHECK-NEXT: vfwmacc.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmacc.mask.nxv2f32.f16( + %0, + half %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfwmacc.nxv4f32.f16( + , + half, + , + i32); + +define @intrinsic_vfwmacc_vf_nxv4f32_f16_nxv4f16( %0, half %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwmacc_vf_nxv4f32_f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m1,ta,mu +; CHECK-NEXT: vfwmacc.vf v16, ft0, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmacc.nxv4f32.f16( + %0, + half %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfwmacc.mask.nxv4f32.f16( + , + half, + , + , + i32); + +define @intrinsic_vfwmacc_mask_vf_nxv4f32_f16_nxv4f16( %0, half %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmacc_mask_vf_nxv4f32_f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m1,ta,mu +; CHECK-NEXT: vfwmacc.vf v16, ft0, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmacc.mask.nxv4f32.f16( + %0, + half %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfwmacc.nxv8f32.f16( + , + half, + , + i32); + +define @intrinsic_vfwmacc_vf_nxv8f32_f16_nxv8f16( %0, half %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwmacc_vf_nxv8f32_f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m2,ta,mu +; CHECK-NEXT: vfwmacc.vf v16, ft0, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmacc.nxv8f32.f16( + %0, + half %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfwmacc.mask.nxv8f32.f16( + , + half, + , + , + i32); + +define @intrinsic_vfwmacc_mask_vf_nxv8f32_f16_nxv8f16( %0, half %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmacc_mask_vf_nxv8f32_f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m2,ta,mu +; CHECK-NEXT: vfwmacc.vf v16, ft0, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmacc.mask.nxv8f32.f16( + %0, + half %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfwmacc.nxv16f32.f16( + , + half, + , + i32); + +define @intrinsic_vfwmacc_vf_nxv16f32_f16_nxv16f16( %0, half %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwmacc_vf_nxv16f32_f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a1) +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a2, e16,m4,ta,mu +; CHECK-NEXT: vfwmacc.vf v16, ft0, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmacc.nxv16f32.f16( + %0, + half %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfwmacc.mask.nxv16f32.f16( + , + half, + , + , + i32); + +define @intrinsic_vfwmacc_mask_vf_nxv16f32_f16_nxv16f16( %0, half %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmacc_mask_vf_nxv16f32_f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a1) +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a2, e16,m4,ta,mu +; CHECK-NEXT: vfwmacc.vf v16, ft0, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmacc.mask.nxv16f32.f16( + %0, + half %1, + %2, + %3, + i32 %4) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwmacc-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwmacc-rv64.ll new file mode 100644 index 0000000000000..ccd970ef6af33 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfwmacc-rv64.ll @@ -0,0 +1,868 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+experimental-zfh -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare @llvm.riscv.vfwmacc.nxv1f32.nxv1f16( + , + , + , + i64); + +define @intrinsic_vfwmacc_vv_nxv1f32_nxv1f16_nxv1f16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwmacc_vv_nxv1f32_nxv1f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu +; CHECK-NEXT: vfwmacc.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmacc.nxv1f32.nxv1f16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfwmacc.mask.nxv1f32.nxv1f16( + , + , + , + , + i64); + +define @intrinsic_vfwmacc_mask_vv_nxv1f32_nxv1f16_nxv1f16( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmacc_mask_vv_nxv1f32_nxv1f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu +; CHECK-NEXT: vfwmacc.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmacc.mask.nxv1f32.nxv1f16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfwmacc.nxv2f32.nxv2f16( + , + , + , + i64); + +define @intrinsic_vfwmacc_vv_nxv2f32_nxv2f16_nxv2f16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwmacc_vv_nxv2f32_nxv2f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu +; CHECK-NEXT: vfwmacc.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmacc.nxv2f32.nxv2f16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfwmacc.mask.nxv2f32.nxv2f16( + , + , + , + , + i64); + +define @intrinsic_vfwmacc_mask_vv_nxv2f32_nxv2f16_nxv2f16( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmacc_mask_vv_nxv2f32_nxv2f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu +; CHECK-NEXT: vfwmacc.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmacc.mask.nxv2f32.nxv2f16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfwmacc.nxv4f32.nxv4f16( + , + , + , + i64); + +define @intrinsic_vfwmacc_vv_nxv4f32_nxv4f16_nxv4f16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwmacc_vv_nxv4f32_nxv4f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vfwmacc.vv v16, v18, v19 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmacc.nxv4f32.nxv4f16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfwmacc.mask.nxv4f32.nxv4f16( + , + , + , + , + i64); + +define @intrinsic_vfwmacc_mask_vv_nxv4f32_nxv4f16_nxv4f16( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmacc_mask_vv_nxv4f32_nxv4f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vfwmacc.vv v16, v18, v19, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmacc.mask.nxv4f32.nxv4f16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfwmacc.nxv8f32.nxv8f16( + , + , + , + i64); + +define @intrinsic_vfwmacc_vv_nxv8f32_nxv8f16_nxv8f16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwmacc_vv_nxv8f32_nxv8f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu +; CHECK-NEXT: vfwmacc.vv v16, v20, v22 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmacc.nxv8f32.nxv8f16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfwmacc.mask.nxv8f32.nxv8f16( + , + , + , + , + i64); + +define @intrinsic_vfwmacc_mask_vv_nxv8f32_nxv8f16_nxv8f16( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmacc_mask_vv_nxv8f32_nxv8f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu +; CHECK-NEXT: vfwmacc.vv v16, v20, v22, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmacc.mask.nxv8f32.nxv8f16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfwmacc.nxv16f32.nxv16f16( + , + , + , + i64); + +define @intrinsic_vfwmacc_vv_nxv16f32_nxv16f16_nxv16f16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwmacc_vv_nxv16f32_nxv16f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a1) +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vsetvli a0, a2, e16,m4,ta,mu +; CHECK-NEXT: vfwmacc.vv v16, v8, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmacc.nxv16f32.nxv16f16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfwmacc.mask.nxv16f32.nxv16f16( + , + , + , + , + i64); + +define @intrinsic_vfwmacc_mask_vv_nxv16f32_nxv16f16_nxv16f16( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmacc_mask_vv_nxv16f32_nxv16f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a1) +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vsetvli a0, a2, e16,m4,ta,mu +; CHECK-NEXT: vfwmacc.vv v16, v8, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmacc.mask.nxv16f32.nxv16f16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfwmacc.nxv1f64.nxv1f32( + , + , + , + i64); + +define @intrinsic_vfwmacc_vv_nxv1f64_nxv1f32_nxv1f32( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwmacc_vv_nxv1f64_nxv1f32_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,mf2,ta,mu +; CHECK-NEXT: vfwmacc.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmacc.nxv1f64.nxv1f32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfwmacc.mask.nxv1f64.nxv1f32( + , + , + , + , + i64); + +define @intrinsic_vfwmacc_mask_vv_nxv1f64_nxv1f32_nxv1f32( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmacc_mask_vv_nxv1f64_nxv1f32_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,mf2,ta,mu +; CHECK-NEXT: vfwmacc.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmacc.mask.nxv1f64.nxv1f32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfwmacc.nxv2f64.nxv2f32( + , + , + , + i64); + +define @intrinsic_vfwmacc_vv_nxv2f64_nxv2f32_nxv2f32( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwmacc_vv_nxv2f64_nxv2f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; CHECK-NEXT: vfwmacc.vv v16, v18, v19 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmacc.nxv2f64.nxv2f32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfwmacc.mask.nxv2f64.nxv2f32( + , + , + , + , + i64); + +define @intrinsic_vfwmacc_mask_vv_nxv2f64_nxv2f32_nxv2f32( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmacc_mask_vv_nxv2f64_nxv2f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; CHECK-NEXT: vfwmacc.vv v16, v18, v19, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmacc.mask.nxv2f64.nxv2f32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfwmacc.nxv4f64.nxv4f32( + , + , + , + i64); + +define @intrinsic_vfwmacc_vv_nxv4f64_nxv4f32_nxv4f32( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwmacc_vv_nxv4f64_nxv4f32_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu +; CHECK-NEXT: vfwmacc.vv v16, v20, v22 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmacc.nxv4f64.nxv4f32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfwmacc.mask.nxv4f64.nxv4f32( + , + , + , + , + i64); + +define @intrinsic_vfwmacc_mask_vv_nxv4f64_nxv4f32_nxv4f32( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmacc_mask_vv_nxv4f64_nxv4f32_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu +; CHECK-NEXT: vfwmacc.vv v16, v20, v22, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmacc.mask.nxv4f64.nxv4f32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfwmacc.nxv8f64.nxv8f32( + , + , + , + i64); + +define @intrinsic_vfwmacc_vv_nxv8f64_nxv8f32_nxv8f32( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwmacc_vv_nxv8f64_nxv8f32_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e32,m4,ta,mu +; CHECK-NEXT: vle32.v v28, (a1) +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vsetvli a0, a2, e32,m4,ta,mu +; CHECK-NEXT: vfwmacc.vv v16, v8, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmacc.nxv8f64.nxv8f32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfwmacc.mask.nxv8f64.nxv8f32( + , + , + , + , + i64); + +define @intrinsic_vfwmacc_mask_vv_nxv8f64_nxv8f32_nxv8f32( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmacc_mask_vv_nxv8f64_nxv8f32_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e32,m4,ta,mu +; CHECK-NEXT: vle32.v v28, (a1) +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vsetvli a0, a2, e32,m4,ta,mu +; CHECK-NEXT: vfwmacc.vv v16, v8, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmacc.mask.nxv8f64.nxv8f32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfwmacc.nxv1f32.f16( + , + half, + , + i64); + +define @intrinsic_vfwmacc_vf_nxv1f32_f16_nxv1f16( %0, half %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwmacc_vf_nxv1f32_f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf4,ta,mu +; CHECK-NEXT: vfwmacc.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmacc.nxv1f32.f16( + %0, + half %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfwmacc.mask.nxv1f32.f16( + , + half, + , + , + i64); + +define @intrinsic_vfwmacc_mask_vf_nxv1f32_f16_nxv1f16( %0, half %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmacc_mask_vf_nxv1f32_f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf4,ta,mu +; CHECK-NEXT: vfwmacc.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmacc.mask.nxv1f32.f16( + %0, + half %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfwmacc.nxv2f32.f16( + , + half, + , + i64); + +define @intrinsic_vfwmacc_vf_nxv2f32_f16_nxv2f16( %0, half %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwmacc_vf_nxv2f32_f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf2,ta,mu +; CHECK-NEXT: vfwmacc.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmacc.nxv2f32.f16( + %0, + half %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfwmacc.mask.nxv2f32.f16( + , + half, + , + , + i64); + +define @intrinsic_vfwmacc_mask_vf_nxv2f32_f16_nxv2f16( %0, half %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmacc_mask_vf_nxv2f32_f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf2,ta,mu +; CHECK-NEXT: vfwmacc.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmacc.mask.nxv2f32.f16( + %0, + half %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfwmacc.nxv4f32.f16( + , + half, + , + i64); + +define @intrinsic_vfwmacc_vf_nxv4f32_f16_nxv4f16( %0, half %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwmacc_vf_nxv4f32_f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m1,ta,mu +; CHECK-NEXT: vfwmacc.vf v16, ft0, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmacc.nxv4f32.f16( + %0, + half %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfwmacc.mask.nxv4f32.f16( + , + half, + , + , + i64); + +define @intrinsic_vfwmacc_mask_vf_nxv4f32_f16_nxv4f16( %0, half %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmacc_mask_vf_nxv4f32_f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m1,ta,mu +; CHECK-NEXT: vfwmacc.vf v16, ft0, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmacc.mask.nxv4f32.f16( + %0, + half %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfwmacc.nxv8f32.f16( + , + half, + , + i64); + +define @intrinsic_vfwmacc_vf_nxv8f32_f16_nxv8f16( %0, half %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwmacc_vf_nxv8f32_f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m2,ta,mu +; CHECK-NEXT: vfwmacc.vf v16, ft0, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmacc.nxv8f32.f16( + %0, + half %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfwmacc.mask.nxv8f32.f16( + , + half, + , + , + i64); + +define @intrinsic_vfwmacc_mask_vf_nxv8f32_f16_nxv8f16( %0, half %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmacc_mask_vf_nxv8f32_f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m2,ta,mu +; CHECK-NEXT: vfwmacc.vf v16, ft0, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmacc.mask.nxv8f32.f16( + %0, + half %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfwmacc.nxv16f32.f16( + , + half, + , + i64); + +define @intrinsic_vfwmacc_vf_nxv16f32_f16_nxv16f16( %0, half %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwmacc_vf_nxv16f32_f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a1) +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a2, e16,m4,ta,mu +; CHECK-NEXT: vfwmacc.vf v16, ft0, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmacc.nxv16f32.f16( + %0, + half %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfwmacc.mask.nxv16f32.f16( + , + half, + , + , + i64); + +define @intrinsic_vfwmacc_mask_vf_nxv16f32_f16_nxv16f16( %0, half %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmacc_mask_vf_nxv16f32_f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a1) +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a2, e16,m4,ta,mu +; CHECK-NEXT: vfwmacc.vf v16, ft0, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmacc.mask.nxv16f32.f16( + %0, + half %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfwmacc.nxv1f64.f32( + , + float, + , + i64); + +define @intrinsic_vfwmacc_vf_nxv1f64_f32_nxv1f32( %0, float %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwmacc_vf_nxv1f64_f32_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,mf2,ta,mu +; CHECK-NEXT: vfwmacc.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmacc.nxv1f64.f32( + %0, + float %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfwmacc.mask.nxv1f64.f32( + , + float, + , + , + i64); + +define @intrinsic_vfwmacc_mask_vf_nxv1f64_f32_nxv1f32( %0, float %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmacc_mask_vf_nxv1f64_f32_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,mf2,ta,mu +; CHECK-NEXT: vfwmacc.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmacc.mask.nxv1f64.f32( + %0, + float %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfwmacc.nxv2f64.f32( + , + float, + , + i64); + +define @intrinsic_vfwmacc_vf_nxv2f64_f32_nxv2f32( %0, float %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwmacc_vf_nxv2f64_f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m1,ta,mu +; CHECK-NEXT: vfwmacc.vf v16, ft0, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmacc.nxv2f64.f32( + %0, + float %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfwmacc.mask.nxv2f64.f32( + , + float, + , + , + i64); + +define @intrinsic_vfwmacc_mask_vf_nxv2f64_f32_nxv2f32( %0, float %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmacc_mask_vf_nxv2f64_f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m1,ta,mu +; CHECK-NEXT: vfwmacc.vf v16, ft0, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmacc.mask.nxv2f64.f32( + %0, + float %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfwmacc.nxv4f64.f32( + , + float, + , + i64); + +define @intrinsic_vfwmacc_vf_nxv4f64_f32_nxv4f32( %0, float %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwmacc_vf_nxv4f64_f32_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m2,ta,mu +; CHECK-NEXT: vfwmacc.vf v16, ft0, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmacc.nxv4f64.f32( + %0, + float %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfwmacc.mask.nxv4f64.f32( + , + float, + , + , + i64); + +define @intrinsic_vfwmacc_mask_vf_nxv4f64_f32_nxv4f32( %0, float %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmacc_mask_vf_nxv4f64_f32_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m2,ta,mu +; CHECK-NEXT: vfwmacc.vf v16, ft0, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmacc.mask.nxv4f64.f32( + %0, + float %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfwmacc.nxv8f64.f32( + , + float, + , + i64); + +define @intrinsic_vfwmacc_vf_nxv8f64_f32_nxv8f32( %0, float %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwmacc_vf_nxv8f64_f32_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e32,m4,ta,mu +; CHECK-NEXT: vle32.v v28, (a1) +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a2, e32,m4,ta,mu +; CHECK-NEXT: vfwmacc.vf v16, ft0, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmacc.nxv8f64.f32( + %0, + float %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfwmacc.mask.nxv8f64.f32( + , + float, + , + , + i64); + +define @intrinsic_vfwmacc_mask_vf_nxv8f64_f32_nxv8f32( %0, float %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmacc_mask_vf_nxv8f64_f32_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e32,m4,ta,mu +; CHECK-NEXT: vle32.v v28, (a1) +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a2, e32,m4,ta,mu +; CHECK-NEXT: vfwmacc.vf v16, ft0, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmacc.mask.nxv8f64.f32( + %0, + float %1, + %2, + %3, + i64 %4) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwmsac-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwmsac-rv32.ll new file mode 100644 index 0000000000000..27b22f94f12ae --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfwmsac-rv32.ll @@ -0,0 +1,482 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f,+experimental-zfh -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare @llvm.riscv.vfwmsac.nxv1f32.nxv1f16( + , + , + , + i32); + +define @intrinsic_vfwmsac_vv_nxv1f32_nxv1f16_nxv1f16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_vv_nxv1f32_nxv1f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu +; CHECK-NEXT: vfwmsac.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmsac.nxv1f32.nxv1f16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfwmsac.mask.nxv1f32.nxv1f16( + , + , + , + , + i32); + +define @intrinsic_vfwmsac_mask_vv_nxv1f32_nxv1f16_nxv1f16( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_mask_vv_nxv1f32_nxv1f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu +; CHECK-NEXT: vfwmsac.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmsac.mask.nxv1f32.nxv1f16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfwmsac.nxv2f32.nxv2f16( + , + , + , + i32); + +define @intrinsic_vfwmsac_vv_nxv2f32_nxv2f16_nxv2f16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_vv_nxv2f32_nxv2f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu +; CHECK-NEXT: vfwmsac.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmsac.nxv2f32.nxv2f16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfwmsac.mask.nxv2f32.nxv2f16( + , + , + , + , + i32); + +define @intrinsic_vfwmsac_mask_vv_nxv2f32_nxv2f16_nxv2f16( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_mask_vv_nxv2f32_nxv2f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu +; CHECK-NEXT: vfwmsac.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmsac.mask.nxv2f32.nxv2f16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfwmsac.nxv4f32.nxv4f16( + , + , + , + i32); + +define @intrinsic_vfwmsac_vv_nxv4f32_nxv4f16_nxv4f16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_vv_nxv4f32_nxv4f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vfwmsac.vv v16, v18, v19 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmsac.nxv4f32.nxv4f16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfwmsac.mask.nxv4f32.nxv4f16( + , + , + , + , + i32); + +define @intrinsic_vfwmsac_mask_vv_nxv4f32_nxv4f16_nxv4f16( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_mask_vv_nxv4f32_nxv4f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vfwmsac.vv v16, v18, v19, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmsac.mask.nxv4f32.nxv4f16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfwmsac.nxv8f32.nxv8f16( + , + , + , + i32); + +define @intrinsic_vfwmsac_vv_nxv8f32_nxv8f16_nxv8f16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_vv_nxv8f32_nxv8f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu +; CHECK-NEXT: vfwmsac.vv v16, v20, v22 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmsac.nxv8f32.nxv8f16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfwmsac.mask.nxv8f32.nxv8f16( + , + , + , + , + i32); + +define @intrinsic_vfwmsac_mask_vv_nxv8f32_nxv8f16_nxv8f16( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_mask_vv_nxv8f32_nxv8f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu +; CHECK-NEXT: vfwmsac.vv v16, v20, v22, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmsac.mask.nxv8f32.nxv8f16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfwmsac.nxv16f32.nxv16f16( + , + , + , + i32); + +define @intrinsic_vfwmsac_vv_nxv16f32_nxv16f16_nxv16f16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_vv_nxv16f32_nxv16f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a1) +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vsetvli a0, a2, e16,m4,ta,mu +; CHECK-NEXT: vfwmsac.vv v16, v8, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmsac.nxv16f32.nxv16f16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfwmsac.mask.nxv16f32.nxv16f16( + , + , + , + , + i32); + +define @intrinsic_vfwmsac_mask_vv_nxv16f32_nxv16f16_nxv16f16( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_mask_vv_nxv16f32_nxv16f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a1) +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vsetvli a0, a2, e16,m4,ta,mu +; CHECK-NEXT: vfwmsac.vv v16, v8, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmsac.mask.nxv16f32.nxv16f16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfwmsac.nxv1f32.f16( + , + half, + , + i32); + +define @intrinsic_vfwmsac_vf_nxv1f32_f16_nxv1f16( %0, half %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv1f32_f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf4,ta,mu +; CHECK-NEXT: vfwmsac.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmsac.nxv1f32.f16( + %0, + half %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfwmsac.mask.nxv1f32.f16( + , + half, + , + , + i32); + +define @intrinsic_vfwmsac_mask_vf_nxv1f32_f16_nxv1f16( %0, half %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv1f32_f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf4,ta,mu +; CHECK-NEXT: vfwmsac.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmsac.mask.nxv1f32.f16( + %0, + half %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfwmsac.nxv2f32.f16( + , + half, + , + i32); + +define @intrinsic_vfwmsac_vf_nxv2f32_f16_nxv2f16( %0, half %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv2f32_f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf2,ta,mu +; CHECK-NEXT: vfwmsac.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmsac.nxv2f32.f16( + %0, + half %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfwmsac.mask.nxv2f32.f16( + , + half, + , + , + i32); + +define @intrinsic_vfwmsac_mask_vf_nxv2f32_f16_nxv2f16( %0, half %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv2f32_f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf2,ta,mu +; CHECK-NEXT: vfwmsac.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmsac.mask.nxv2f32.f16( + %0, + half %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfwmsac.nxv4f32.f16( + , + half, + , + i32); + +define @intrinsic_vfwmsac_vf_nxv4f32_f16_nxv4f16( %0, half %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv4f32_f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m1,ta,mu +; CHECK-NEXT: vfwmsac.vf v16, ft0, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmsac.nxv4f32.f16( + %0, + half %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfwmsac.mask.nxv4f32.f16( + , + half, + , + , + i32); + +define @intrinsic_vfwmsac_mask_vf_nxv4f32_f16_nxv4f16( %0, half %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv4f32_f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m1,ta,mu +; CHECK-NEXT: vfwmsac.vf v16, ft0, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmsac.mask.nxv4f32.f16( + %0, + half %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfwmsac.nxv8f32.f16( + , + half, + , + i32); + +define @intrinsic_vfwmsac_vf_nxv8f32_f16_nxv8f16( %0, half %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv8f32_f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m2,ta,mu +; CHECK-NEXT: vfwmsac.vf v16, ft0, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmsac.nxv8f32.f16( + %0, + half %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfwmsac.mask.nxv8f32.f16( + , + half, + , + , + i32); + +define @intrinsic_vfwmsac_mask_vf_nxv8f32_f16_nxv8f16( %0, half %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv8f32_f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m2,ta,mu +; CHECK-NEXT: vfwmsac.vf v16, ft0, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmsac.mask.nxv8f32.f16( + %0, + half %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfwmsac.nxv16f32.f16( + , + half, + , + i32); + +define @intrinsic_vfwmsac_vf_nxv16f32_f16_nxv16f16( %0, half %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv16f32_f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a1) +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a2, e16,m4,ta,mu +; CHECK-NEXT: vfwmsac.vf v16, ft0, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmsac.nxv16f32.f16( + %0, + half %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfwmsac.mask.nxv16f32.f16( + , + half, + , + , + i32); + +define @intrinsic_vfwmsac_mask_vf_nxv16f32_f16_nxv16f16( %0, half %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv16f32_f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a1) +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a2, e16,m4,ta,mu +; CHECK-NEXT: vfwmsac.vf v16, ft0, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmsac.mask.nxv16f32.f16( + %0, + half %1, + %2, + %3, + i32 %4) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwmsac-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwmsac-rv64.ll new file mode 100644 index 0000000000000..b5149780a79ef --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfwmsac-rv64.ll @@ -0,0 +1,868 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+experimental-zfh -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare @llvm.riscv.vfwmsac.nxv1f32.nxv1f16( + , + , + , + i64); + +define @intrinsic_vfwmsac_vv_nxv1f32_nxv1f16_nxv1f16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_vv_nxv1f32_nxv1f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu +; CHECK-NEXT: vfwmsac.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmsac.nxv1f32.nxv1f16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfwmsac.mask.nxv1f32.nxv1f16( + , + , + , + , + i64); + +define @intrinsic_vfwmsac_mask_vv_nxv1f32_nxv1f16_nxv1f16( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_mask_vv_nxv1f32_nxv1f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu +; CHECK-NEXT: vfwmsac.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmsac.mask.nxv1f32.nxv1f16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfwmsac.nxv2f32.nxv2f16( + , + , + , + i64); + +define @intrinsic_vfwmsac_vv_nxv2f32_nxv2f16_nxv2f16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_vv_nxv2f32_nxv2f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu +; CHECK-NEXT: vfwmsac.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmsac.nxv2f32.nxv2f16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfwmsac.mask.nxv2f32.nxv2f16( + , + , + , + , + i64); + +define @intrinsic_vfwmsac_mask_vv_nxv2f32_nxv2f16_nxv2f16( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_mask_vv_nxv2f32_nxv2f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu +; CHECK-NEXT: vfwmsac.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmsac.mask.nxv2f32.nxv2f16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfwmsac.nxv4f32.nxv4f16( + , + , + , + i64); + +define @intrinsic_vfwmsac_vv_nxv4f32_nxv4f16_nxv4f16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_vv_nxv4f32_nxv4f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vfwmsac.vv v16, v18, v19 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmsac.nxv4f32.nxv4f16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfwmsac.mask.nxv4f32.nxv4f16( + , + , + , + , + i64); + +define @intrinsic_vfwmsac_mask_vv_nxv4f32_nxv4f16_nxv4f16( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_mask_vv_nxv4f32_nxv4f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vfwmsac.vv v16, v18, v19, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmsac.mask.nxv4f32.nxv4f16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfwmsac.nxv8f32.nxv8f16( + , + , + , + i64); + +define @intrinsic_vfwmsac_vv_nxv8f32_nxv8f16_nxv8f16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_vv_nxv8f32_nxv8f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu +; CHECK-NEXT: vfwmsac.vv v16, v20, v22 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmsac.nxv8f32.nxv8f16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfwmsac.mask.nxv8f32.nxv8f16( + , + , + , + , + i64); + +define @intrinsic_vfwmsac_mask_vv_nxv8f32_nxv8f16_nxv8f16( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_mask_vv_nxv8f32_nxv8f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu +; CHECK-NEXT: vfwmsac.vv v16, v20, v22, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmsac.mask.nxv8f32.nxv8f16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfwmsac.nxv16f32.nxv16f16( + , + , + , + i64); + +define @intrinsic_vfwmsac_vv_nxv16f32_nxv16f16_nxv16f16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_vv_nxv16f32_nxv16f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a1) +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vsetvli a0, a2, e16,m4,ta,mu +; CHECK-NEXT: vfwmsac.vv v16, v8, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmsac.nxv16f32.nxv16f16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfwmsac.mask.nxv16f32.nxv16f16( + , + , + , + , + i64); + +define @intrinsic_vfwmsac_mask_vv_nxv16f32_nxv16f16_nxv16f16( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_mask_vv_nxv16f32_nxv16f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a1) +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vsetvli a0, a2, e16,m4,ta,mu +; CHECK-NEXT: vfwmsac.vv v16, v8, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmsac.mask.nxv16f32.nxv16f16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfwmsac.nxv1f64.nxv1f32( + , + , + , + i64); + +define @intrinsic_vfwmsac_vv_nxv1f64_nxv1f32_nxv1f32( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_vv_nxv1f64_nxv1f32_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,mf2,ta,mu +; CHECK-NEXT: vfwmsac.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmsac.nxv1f64.nxv1f32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfwmsac.mask.nxv1f64.nxv1f32( + , + , + , + , + i64); + +define @intrinsic_vfwmsac_mask_vv_nxv1f64_nxv1f32_nxv1f32( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_mask_vv_nxv1f64_nxv1f32_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,mf2,ta,mu +; CHECK-NEXT: vfwmsac.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmsac.mask.nxv1f64.nxv1f32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfwmsac.nxv2f64.nxv2f32( + , + , + , + i64); + +define @intrinsic_vfwmsac_vv_nxv2f64_nxv2f32_nxv2f32( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_vv_nxv2f64_nxv2f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; CHECK-NEXT: vfwmsac.vv v16, v18, v19 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmsac.nxv2f64.nxv2f32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfwmsac.mask.nxv2f64.nxv2f32( + , + , + , + , + i64); + +define @intrinsic_vfwmsac_mask_vv_nxv2f64_nxv2f32_nxv2f32( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_mask_vv_nxv2f64_nxv2f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; CHECK-NEXT: vfwmsac.vv v16, v18, v19, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmsac.mask.nxv2f64.nxv2f32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfwmsac.nxv4f64.nxv4f32( + , + , + , + i64); + +define @intrinsic_vfwmsac_vv_nxv4f64_nxv4f32_nxv4f32( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_vv_nxv4f64_nxv4f32_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu +; CHECK-NEXT: vfwmsac.vv v16, v20, v22 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmsac.nxv4f64.nxv4f32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfwmsac.mask.nxv4f64.nxv4f32( + , + , + , + , + i64); + +define @intrinsic_vfwmsac_mask_vv_nxv4f64_nxv4f32_nxv4f32( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_mask_vv_nxv4f64_nxv4f32_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu +; CHECK-NEXT: vfwmsac.vv v16, v20, v22, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmsac.mask.nxv4f64.nxv4f32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfwmsac.nxv8f64.nxv8f32( + , + , + , + i64); + +define @intrinsic_vfwmsac_vv_nxv8f64_nxv8f32_nxv8f32( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_vv_nxv8f64_nxv8f32_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e32,m4,ta,mu +; CHECK-NEXT: vle32.v v28, (a1) +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vsetvli a0, a2, e32,m4,ta,mu +; CHECK-NEXT: vfwmsac.vv v16, v8, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmsac.nxv8f64.nxv8f32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfwmsac.mask.nxv8f64.nxv8f32( + , + , + , + , + i64); + +define @intrinsic_vfwmsac_mask_vv_nxv8f64_nxv8f32_nxv8f32( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_mask_vv_nxv8f64_nxv8f32_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e32,m4,ta,mu +; CHECK-NEXT: vle32.v v28, (a1) +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vsetvli a0, a2, e32,m4,ta,mu +; CHECK-NEXT: vfwmsac.vv v16, v8, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmsac.mask.nxv8f64.nxv8f32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfwmsac.nxv1f32.f16( + , + half, + , + i64); + +define @intrinsic_vfwmsac_vf_nxv1f32_f16_nxv1f16( %0, half %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv1f32_f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf4,ta,mu +; CHECK-NEXT: vfwmsac.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmsac.nxv1f32.f16( + %0, + half %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfwmsac.mask.nxv1f32.f16( + , + half, + , + , + i64); + +define @intrinsic_vfwmsac_mask_vf_nxv1f32_f16_nxv1f16( %0, half %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv1f32_f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf4,ta,mu +; CHECK-NEXT: vfwmsac.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmsac.mask.nxv1f32.f16( + %0, + half %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfwmsac.nxv2f32.f16( + , + half, + , + i64); + +define @intrinsic_vfwmsac_vf_nxv2f32_f16_nxv2f16( %0, half %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv2f32_f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf2,ta,mu +; CHECK-NEXT: vfwmsac.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmsac.nxv2f32.f16( + %0, + half %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfwmsac.mask.nxv2f32.f16( + , + half, + , + , + i64); + +define @intrinsic_vfwmsac_mask_vf_nxv2f32_f16_nxv2f16( %0, half %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv2f32_f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf2,ta,mu +; CHECK-NEXT: vfwmsac.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmsac.mask.nxv2f32.f16( + %0, + half %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfwmsac.nxv4f32.f16( + , + half, + , + i64); + +define @intrinsic_vfwmsac_vf_nxv4f32_f16_nxv4f16( %0, half %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv4f32_f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m1,ta,mu +; CHECK-NEXT: vfwmsac.vf v16, ft0, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmsac.nxv4f32.f16( + %0, + half %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfwmsac.mask.nxv4f32.f16( + , + half, + , + , + i64); + +define @intrinsic_vfwmsac_mask_vf_nxv4f32_f16_nxv4f16( %0, half %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv4f32_f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m1,ta,mu +; CHECK-NEXT: vfwmsac.vf v16, ft0, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmsac.mask.nxv4f32.f16( + %0, + half %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfwmsac.nxv8f32.f16( + , + half, + , + i64); + +define @intrinsic_vfwmsac_vf_nxv8f32_f16_nxv8f16( %0, half %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv8f32_f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m2,ta,mu +; CHECK-NEXT: vfwmsac.vf v16, ft0, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmsac.nxv8f32.f16( + %0, + half %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfwmsac.mask.nxv8f32.f16( + , + half, + , + , + i64); + +define @intrinsic_vfwmsac_mask_vf_nxv8f32_f16_nxv8f16( %0, half %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv8f32_f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m2,ta,mu +; CHECK-NEXT: vfwmsac.vf v16, ft0, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmsac.mask.nxv8f32.f16( + %0, + half %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfwmsac.nxv16f32.f16( + , + half, + , + i64); + +define @intrinsic_vfwmsac_vf_nxv16f32_f16_nxv16f16( %0, half %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv16f32_f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a1) +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a2, e16,m4,ta,mu +; CHECK-NEXT: vfwmsac.vf v16, ft0, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmsac.nxv16f32.f16( + %0, + half %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfwmsac.mask.nxv16f32.f16( + , + half, + , + , + i64); + +define @intrinsic_vfwmsac_mask_vf_nxv16f32_f16_nxv16f16( %0, half %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv16f32_f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a1) +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a2, e16,m4,ta,mu +; CHECK-NEXT: vfwmsac.vf v16, ft0, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmsac.mask.nxv16f32.f16( + %0, + half %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfwmsac.nxv1f64.f32( + , + float, + , + i64); + +define @intrinsic_vfwmsac_vf_nxv1f64_f32_nxv1f32( %0, float %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv1f64_f32_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,mf2,ta,mu +; CHECK-NEXT: vfwmsac.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmsac.nxv1f64.f32( + %0, + float %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfwmsac.mask.nxv1f64.f32( + , + float, + , + , + i64); + +define @intrinsic_vfwmsac_mask_vf_nxv1f64_f32_nxv1f32( %0, float %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv1f64_f32_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,mf2,ta,mu +; CHECK-NEXT: vfwmsac.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmsac.mask.nxv1f64.f32( + %0, + float %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfwmsac.nxv2f64.f32( + , + float, + , + i64); + +define @intrinsic_vfwmsac_vf_nxv2f64_f32_nxv2f32( %0, float %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv2f64_f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m1,ta,mu +; CHECK-NEXT: vfwmsac.vf v16, ft0, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmsac.nxv2f64.f32( + %0, + float %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfwmsac.mask.nxv2f64.f32( + , + float, + , + , + i64); + +define @intrinsic_vfwmsac_mask_vf_nxv2f64_f32_nxv2f32( %0, float %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv2f64_f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m1,ta,mu +; CHECK-NEXT: vfwmsac.vf v16, ft0, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmsac.mask.nxv2f64.f32( + %0, + float %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfwmsac.nxv4f64.f32( + , + float, + , + i64); + +define @intrinsic_vfwmsac_vf_nxv4f64_f32_nxv4f32( %0, float %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv4f64_f32_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m2,ta,mu +; CHECK-NEXT: vfwmsac.vf v16, ft0, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmsac.nxv4f64.f32( + %0, + float %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfwmsac.mask.nxv4f64.f32( + , + float, + , + , + i64); + +define @intrinsic_vfwmsac_mask_vf_nxv4f64_f32_nxv4f32( %0, float %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv4f64_f32_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m2,ta,mu +; CHECK-NEXT: vfwmsac.vf v16, ft0, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmsac.mask.nxv4f64.f32( + %0, + float %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfwmsac.nxv8f64.f32( + , + float, + , + i64); + +define @intrinsic_vfwmsac_vf_nxv8f64_f32_nxv8f32( %0, float %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv8f64_f32_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e32,m4,ta,mu +; CHECK-NEXT: vle32.v v28, (a1) +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a2, e32,m4,ta,mu +; CHECK-NEXT: vfwmsac.vf v16, ft0, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmsac.nxv8f64.f32( + %0, + float %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfwmsac.mask.nxv8f64.f32( + , + float, + , + , + i64); + +define @intrinsic_vfwmsac_mask_vf_nxv8f64_f32_nxv8f32( %0, float %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv8f64_f32_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e32,m4,ta,mu +; CHECK-NEXT: vle32.v v28, (a1) +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a2, e32,m4,ta,mu +; CHECK-NEXT: vfwmsac.vf v16, ft0, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwmsac.mask.nxv8f64.f32( + %0, + float %1, + %2, + %3, + i64 %4) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-rv32.ll new file mode 100644 index 0000000000000..1b00a96f93d27 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-rv32.ll @@ -0,0 +1,482 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f,+experimental-zfh -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare @llvm.riscv.vfwnmacc.nxv1f32.nxv1f16( + , + , + , + i32); + +define @intrinsic_vfwnmacc_vv_nxv1f32_nxv1f16_nxv1f16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_vv_nxv1f32_nxv1f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu +; CHECK-NEXT: vfwnmacc.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmacc.nxv1f32.nxv1f16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfwnmacc.mask.nxv1f32.nxv1f16( + , + , + , + , + i32); + +define @intrinsic_vfwnmacc_mask_vv_nxv1f32_nxv1f16_nxv1f16( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_mask_vv_nxv1f32_nxv1f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu +; CHECK-NEXT: vfwnmacc.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmacc.mask.nxv1f32.nxv1f16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfwnmacc.nxv2f32.nxv2f16( + , + , + , + i32); + +define @intrinsic_vfwnmacc_vv_nxv2f32_nxv2f16_nxv2f16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_vv_nxv2f32_nxv2f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu +; CHECK-NEXT: vfwnmacc.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmacc.nxv2f32.nxv2f16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfwnmacc.mask.nxv2f32.nxv2f16( + , + , + , + , + i32); + +define @intrinsic_vfwnmacc_mask_vv_nxv2f32_nxv2f16_nxv2f16( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_mask_vv_nxv2f32_nxv2f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu +; CHECK-NEXT: vfwnmacc.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmacc.mask.nxv2f32.nxv2f16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfwnmacc.nxv4f32.nxv4f16( + , + , + , + i32); + +define @intrinsic_vfwnmacc_vv_nxv4f32_nxv4f16_nxv4f16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_vv_nxv4f32_nxv4f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vfwnmacc.vv v16, v18, v19 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmacc.nxv4f32.nxv4f16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfwnmacc.mask.nxv4f32.nxv4f16( + , + , + , + , + i32); + +define @intrinsic_vfwnmacc_mask_vv_nxv4f32_nxv4f16_nxv4f16( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_mask_vv_nxv4f32_nxv4f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vfwnmacc.vv v16, v18, v19, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmacc.mask.nxv4f32.nxv4f16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfwnmacc.nxv8f32.nxv8f16( + , + , + , + i32); + +define @intrinsic_vfwnmacc_vv_nxv8f32_nxv8f16_nxv8f16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_vv_nxv8f32_nxv8f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu +; CHECK-NEXT: vfwnmacc.vv v16, v20, v22 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmacc.nxv8f32.nxv8f16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfwnmacc.mask.nxv8f32.nxv8f16( + , + , + , + , + i32); + +define @intrinsic_vfwnmacc_mask_vv_nxv8f32_nxv8f16_nxv8f16( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_mask_vv_nxv8f32_nxv8f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu +; CHECK-NEXT: vfwnmacc.vv v16, v20, v22, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmacc.mask.nxv8f32.nxv8f16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfwnmacc.nxv16f32.nxv16f16( + , + , + , + i32); + +define @intrinsic_vfwnmacc_vv_nxv16f32_nxv16f16_nxv16f16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_vv_nxv16f32_nxv16f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a1) +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vsetvli a0, a2, e16,m4,ta,mu +; CHECK-NEXT: vfwnmacc.vv v16, v8, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmacc.nxv16f32.nxv16f16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfwnmacc.mask.nxv16f32.nxv16f16( + , + , + , + , + i32); + +define @intrinsic_vfwnmacc_mask_vv_nxv16f32_nxv16f16_nxv16f16( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_mask_vv_nxv16f32_nxv16f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a1) +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vsetvli a0, a2, e16,m4,ta,mu +; CHECK-NEXT: vfwnmacc.vv v16, v8, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmacc.mask.nxv16f32.nxv16f16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfwnmacc.nxv1f32.f16( + , + half, + , + i32); + +define @intrinsic_vfwnmacc_vf_nxv1f32_f16_nxv1f16( %0, half %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv1f32_f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf4,ta,mu +; CHECK-NEXT: vfwnmacc.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmacc.nxv1f32.f16( + %0, + half %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfwnmacc.mask.nxv1f32.f16( + , + half, + , + , + i32); + +define @intrinsic_vfwnmacc_mask_vf_nxv1f32_f16_nxv1f16( %0, half %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv1f32_f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf4,ta,mu +; CHECK-NEXT: vfwnmacc.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmacc.mask.nxv1f32.f16( + %0, + half %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfwnmacc.nxv2f32.f16( + , + half, + , + i32); + +define @intrinsic_vfwnmacc_vf_nxv2f32_f16_nxv2f16( %0, half %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv2f32_f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf2,ta,mu +; CHECK-NEXT: vfwnmacc.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmacc.nxv2f32.f16( + %0, + half %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfwnmacc.mask.nxv2f32.f16( + , + half, + , + , + i32); + +define @intrinsic_vfwnmacc_mask_vf_nxv2f32_f16_nxv2f16( %0, half %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv2f32_f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf2,ta,mu +; CHECK-NEXT: vfwnmacc.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmacc.mask.nxv2f32.f16( + %0, + half %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfwnmacc.nxv4f32.f16( + , + half, + , + i32); + +define @intrinsic_vfwnmacc_vf_nxv4f32_f16_nxv4f16( %0, half %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv4f32_f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m1,ta,mu +; CHECK-NEXT: vfwnmacc.vf v16, ft0, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmacc.nxv4f32.f16( + %0, + half %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfwnmacc.mask.nxv4f32.f16( + , + half, + , + , + i32); + +define @intrinsic_vfwnmacc_mask_vf_nxv4f32_f16_nxv4f16( %0, half %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv4f32_f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m1,ta,mu +; CHECK-NEXT: vfwnmacc.vf v16, ft0, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmacc.mask.nxv4f32.f16( + %0, + half %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfwnmacc.nxv8f32.f16( + , + half, + , + i32); + +define @intrinsic_vfwnmacc_vf_nxv8f32_f16_nxv8f16( %0, half %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv8f32_f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m2,ta,mu +; CHECK-NEXT: vfwnmacc.vf v16, ft0, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmacc.nxv8f32.f16( + %0, + half %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfwnmacc.mask.nxv8f32.f16( + , + half, + , + , + i32); + +define @intrinsic_vfwnmacc_mask_vf_nxv8f32_f16_nxv8f16( %0, half %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv8f32_f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m2,ta,mu +; CHECK-NEXT: vfwnmacc.vf v16, ft0, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmacc.mask.nxv8f32.f16( + %0, + half %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfwnmacc.nxv16f32.f16( + , + half, + , + i32); + +define @intrinsic_vfwnmacc_vf_nxv16f32_f16_nxv16f16( %0, half %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv16f32_f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a1) +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a2, e16,m4,ta,mu +; CHECK-NEXT: vfwnmacc.vf v16, ft0, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmacc.nxv16f32.f16( + %0, + half %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfwnmacc.mask.nxv16f32.f16( + , + half, + , + , + i32); + +define @intrinsic_vfwnmacc_mask_vf_nxv16f32_f16_nxv16f16( %0, half %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv16f32_f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a1) +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a2, e16,m4,ta,mu +; CHECK-NEXT: vfwnmacc.vf v16, ft0, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmacc.mask.nxv16f32.f16( + %0, + half %1, + %2, + %3, + i32 %4) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-rv64.ll new file mode 100644 index 0000000000000..66d284f1067d6 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-rv64.ll @@ -0,0 +1,868 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+experimental-zfh -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare @llvm.riscv.vfwnmacc.nxv1f32.nxv1f16( + , + , + , + i64); + +define @intrinsic_vfwnmacc_vv_nxv1f32_nxv1f16_nxv1f16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_vv_nxv1f32_nxv1f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu +; CHECK-NEXT: vfwnmacc.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmacc.nxv1f32.nxv1f16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfwnmacc.mask.nxv1f32.nxv1f16( + , + , + , + , + i64); + +define @intrinsic_vfwnmacc_mask_vv_nxv1f32_nxv1f16_nxv1f16( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_mask_vv_nxv1f32_nxv1f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu +; CHECK-NEXT: vfwnmacc.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmacc.mask.nxv1f32.nxv1f16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfwnmacc.nxv2f32.nxv2f16( + , + , + , + i64); + +define @intrinsic_vfwnmacc_vv_nxv2f32_nxv2f16_nxv2f16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_vv_nxv2f32_nxv2f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu +; CHECK-NEXT: vfwnmacc.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmacc.nxv2f32.nxv2f16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfwnmacc.mask.nxv2f32.nxv2f16( + , + , + , + , + i64); + +define @intrinsic_vfwnmacc_mask_vv_nxv2f32_nxv2f16_nxv2f16( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_mask_vv_nxv2f32_nxv2f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu +; CHECK-NEXT: vfwnmacc.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmacc.mask.nxv2f32.nxv2f16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfwnmacc.nxv4f32.nxv4f16( + , + , + , + i64); + +define @intrinsic_vfwnmacc_vv_nxv4f32_nxv4f16_nxv4f16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_vv_nxv4f32_nxv4f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vfwnmacc.vv v16, v18, v19 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmacc.nxv4f32.nxv4f16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfwnmacc.mask.nxv4f32.nxv4f16( + , + , + , + , + i64); + +define @intrinsic_vfwnmacc_mask_vv_nxv4f32_nxv4f16_nxv4f16( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_mask_vv_nxv4f32_nxv4f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vfwnmacc.vv v16, v18, v19, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmacc.mask.nxv4f32.nxv4f16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfwnmacc.nxv8f32.nxv8f16( + , + , + , + i64); + +define @intrinsic_vfwnmacc_vv_nxv8f32_nxv8f16_nxv8f16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_vv_nxv8f32_nxv8f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu +; CHECK-NEXT: vfwnmacc.vv v16, v20, v22 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmacc.nxv8f32.nxv8f16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfwnmacc.mask.nxv8f32.nxv8f16( + , + , + , + , + i64); + +define @intrinsic_vfwnmacc_mask_vv_nxv8f32_nxv8f16_nxv8f16( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_mask_vv_nxv8f32_nxv8f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu +; CHECK-NEXT: vfwnmacc.vv v16, v20, v22, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmacc.mask.nxv8f32.nxv8f16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfwnmacc.nxv16f32.nxv16f16( + , + , + , + i64); + +define @intrinsic_vfwnmacc_vv_nxv16f32_nxv16f16_nxv16f16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_vv_nxv16f32_nxv16f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a1) +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vsetvli a0, a2, e16,m4,ta,mu +; CHECK-NEXT: vfwnmacc.vv v16, v8, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmacc.nxv16f32.nxv16f16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfwnmacc.mask.nxv16f32.nxv16f16( + , + , + , + , + i64); + +define @intrinsic_vfwnmacc_mask_vv_nxv16f32_nxv16f16_nxv16f16( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_mask_vv_nxv16f32_nxv16f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a1) +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vsetvli a0, a2, e16,m4,ta,mu +; CHECK-NEXT: vfwnmacc.vv v16, v8, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmacc.mask.nxv16f32.nxv16f16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfwnmacc.nxv1f64.nxv1f32( + , + , + , + i64); + +define @intrinsic_vfwnmacc_vv_nxv1f64_nxv1f32_nxv1f32( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_vv_nxv1f64_nxv1f32_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,mf2,ta,mu +; CHECK-NEXT: vfwnmacc.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmacc.nxv1f64.nxv1f32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfwnmacc.mask.nxv1f64.nxv1f32( + , + , + , + , + i64); + +define @intrinsic_vfwnmacc_mask_vv_nxv1f64_nxv1f32_nxv1f32( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_mask_vv_nxv1f64_nxv1f32_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,mf2,ta,mu +; CHECK-NEXT: vfwnmacc.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmacc.mask.nxv1f64.nxv1f32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfwnmacc.nxv2f64.nxv2f32( + , + , + , + i64); + +define @intrinsic_vfwnmacc_vv_nxv2f64_nxv2f32_nxv2f32( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_vv_nxv2f64_nxv2f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; CHECK-NEXT: vfwnmacc.vv v16, v18, v19 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmacc.nxv2f64.nxv2f32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfwnmacc.mask.nxv2f64.nxv2f32( + , + , + , + , + i64); + +define @intrinsic_vfwnmacc_mask_vv_nxv2f64_nxv2f32_nxv2f32( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_mask_vv_nxv2f64_nxv2f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; CHECK-NEXT: vfwnmacc.vv v16, v18, v19, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmacc.mask.nxv2f64.nxv2f32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfwnmacc.nxv4f64.nxv4f32( + , + , + , + i64); + +define @intrinsic_vfwnmacc_vv_nxv4f64_nxv4f32_nxv4f32( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_vv_nxv4f64_nxv4f32_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu +; CHECK-NEXT: vfwnmacc.vv v16, v20, v22 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmacc.nxv4f64.nxv4f32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfwnmacc.mask.nxv4f64.nxv4f32( + , + , + , + , + i64); + +define @intrinsic_vfwnmacc_mask_vv_nxv4f64_nxv4f32_nxv4f32( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_mask_vv_nxv4f64_nxv4f32_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu +; CHECK-NEXT: vfwnmacc.vv v16, v20, v22, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmacc.mask.nxv4f64.nxv4f32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfwnmacc.nxv8f64.nxv8f32( + , + , + , + i64); + +define @intrinsic_vfwnmacc_vv_nxv8f64_nxv8f32_nxv8f32( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_vv_nxv8f64_nxv8f32_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e32,m4,ta,mu +; CHECK-NEXT: vle32.v v28, (a1) +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vsetvli a0, a2, e32,m4,ta,mu +; CHECK-NEXT: vfwnmacc.vv v16, v8, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmacc.nxv8f64.nxv8f32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfwnmacc.mask.nxv8f64.nxv8f32( + , + , + , + , + i64); + +define @intrinsic_vfwnmacc_mask_vv_nxv8f64_nxv8f32_nxv8f32( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_mask_vv_nxv8f64_nxv8f32_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e32,m4,ta,mu +; CHECK-NEXT: vle32.v v28, (a1) +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vsetvli a0, a2, e32,m4,ta,mu +; CHECK-NEXT: vfwnmacc.vv v16, v8, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmacc.mask.nxv8f64.nxv8f32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfwnmacc.nxv1f32.f16( + , + half, + , + i64); + +define @intrinsic_vfwnmacc_vf_nxv1f32_f16_nxv1f16( %0, half %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv1f32_f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf4,ta,mu +; CHECK-NEXT: vfwnmacc.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmacc.nxv1f32.f16( + %0, + half %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfwnmacc.mask.nxv1f32.f16( + , + half, + , + , + i64); + +define @intrinsic_vfwnmacc_mask_vf_nxv1f32_f16_nxv1f16( %0, half %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv1f32_f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf4,ta,mu +; CHECK-NEXT: vfwnmacc.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmacc.mask.nxv1f32.f16( + %0, + half %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfwnmacc.nxv2f32.f16( + , + half, + , + i64); + +define @intrinsic_vfwnmacc_vf_nxv2f32_f16_nxv2f16( %0, half %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv2f32_f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf2,ta,mu +; CHECK-NEXT: vfwnmacc.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmacc.nxv2f32.f16( + %0, + half %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfwnmacc.mask.nxv2f32.f16( + , + half, + , + , + i64); + +define @intrinsic_vfwnmacc_mask_vf_nxv2f32_f16_nxv2f16( %0, half %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv2f32_f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf2,ta,mu +; CHECK-NEXT: vfwnmacc.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmacc.mask.nxv2f32.f16( + %0, + half %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfwnmacc.nxv4f32.f16( + , + half, + , + i64); + +define @intrinsic_vfwnmacc_vf_nxv4f32_f16_nxv4f16( %0, half %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv4f32_f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m1,ta,mu +; CHECK-NEXT: vfwnmacc.vf v16, ft0, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmacc.nxv4f32.f16( + %0, + half %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfwnmacc.mask.nxv4f32.f16( + , + half, + , + , + i64); + +define @intrinsic_vfwnmacc_mask_vf_nxv4f32_f16_nxv4f16( %0, half %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv4f32_f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m1,ta,mu +; CHECK-NEXT: vfwnmacc.vf v16, ft0, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmacc.mask.nxv4f32.f16( + %0, + half %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfwnmacc.nxv8f32.f16( + , + half, + , + i64); + +define @intrinsic_vfwnmacc_vf_nxv8f32_f16_nxv8f16( %0, half %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv8f32_f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m2,ta,mu +; CHECK-NEXT: vfwnmacc.vf v16, ft0, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmacc.nxv8f32.f16( + %0, + half %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfwnmacc.mask.nxv8f32.f16( + , + half, + , + , + i64); + +define @intrinsic_vfwnmacc_mask_vf_nxv8f32_f16_nxv8f16( %0, half %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv8f32_f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m2,ta,mu +; CHECK-NEXT: vfwnmacc.vf v16, ft0, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmacc.mask.nxv8f32.f16( + %0, + half %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfwnmacc.nxv16f32.f16( + , + half, + , + i64); + +define @intrinsic_vfwnmacc_vf_nxv16f32_f16_nxv16f16( %0, half %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv16f32_f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a1) +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a2, e16,m4,ta,mu +; CHECK-NEXT: vfwnmacc.vf v16, ft0, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmacc.nxv16f32.f16( + %0, + half %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfwnmacc.mask.nxv16f32.f16( + , + half, + , + , + i64); + +define @intrinsic_vfwnmacc_mask_vf_nxv16f32_f16_nxv16f16( %0, half %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv16f32_f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a1) +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a2, e16,m4,ta,mu +; CHECK-NEXT: vfwnmacc.vf v16, ft0, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmacc.mask.nxv16f32.f16( + %0, + half %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfwnmacc.nxv1f64.f32( + , + float, + , + i64); + +define @intrinsic_vfwnmacc_vf_nxv1f64_f32_nxv1f32( %0, float %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv1f64_f32_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,mf2,ta,mu +; CHECK-NEXT: vfwnmacc.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmacc.nxv1f64.f32( + %0, + float %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfwnmacc.mask.nxv1f64.f32( + , + float, + , + , + i64); + +define @intrinsic_vfwnmacc_mask_vf_nxv1f64_f32_nxv1f32( %0, float %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv1f64_f32_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,mf2,ta,mu +; CHECK-NEXT: vfwnmacc.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmacc.mask.nxv1f64.f32( + %0, + float %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfwnmacc.nxv2f64.f32( + , + float, + , + i64); + +define @intrinsic_vfwnmacc_vf_nxv2f64_f32_nxv2f32( %0, float %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv2f64_f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m1,ta,mu +; CHECK-NEXT: vfwnmacc.vf v16, ft0, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmacc.nxv2f64.f32( + %0, + float %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfwnmacc.mask.nxv2f64.f32( + , + float, + , + , + i64); + +define @intrinsic_vfwnmacc_mask_vf_nxv2f64_f32_nxv2f32( %0, float %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv2f64_f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m1,ta,mu +; CHECK-NEXT: vfwnmacc.vf v16, ft0, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmacc.mask.nxv2f64.f32( + %0, + float %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfwnmacc.nxv4f64.f32( + , + float, + , + i64); + +define @intrinsic_vfwnmacc_vf_nxv4f64_f32_nxv4f32( %0, float %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv4f64_f32_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m2,ta,mu +; CHECK-NEXT: vfwnmacc.vf v16, ft0, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmacc.nxv4f64.f32( + %0, + float %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfwnmacc.mask.nxv4f64.f32( + , + float, + , + , + i64); + +define @intrinsic_vfwnmacc_mask_vf_nxv4f64_f32_nxv4f32( %0, float %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv4f64_f32_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m2,ta,mu +; CHECK-NEXT: vfwnmacc.vf v16, ft0, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmacc.mask.nxv4f64.f32( + %0, + float %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfwnmacc.nxv8f64.f32( + , + float, + , + i64); + +define @intrinsic_vfwnmacc_vf_nxv8f64_f32_nxv8f32( %0, float %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv8f64_f32_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e32,m4,ta,mu +; CHECK-NEXT: vle32.v v28, (a1) +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a2, e32,m4,ta,mu +; CHECK-NEXT: vfwnmacc.vf v16, ft0, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmacc.nxv8f64.f32( + %0, + float %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfwnmacc.mask.nxv8f64.f32( + , + float, + , + , + i64); + +define @intrinsic_vfwnmacc_mask_vf_nxv8f64_f32_nxv8f32( %0, float %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv8f64_f32_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e32,m4,ta,mu +; CHECK-NEXT: vle32.v v28, (a1) +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a2, e32,m4,ta,mu +; CHECK-NEXT: vfwnmacc.vf v16, ft0, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmacc.mask.nxv8f64.f32( + %0, + float %1, + %2, + %3, + i64 %4) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-rv32.ll new file mode 100644 index 0000000000000..40bd6c0688da6 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-rv32.ll @@ -0,0 +1,482 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f,+experimental-zfh -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare @llvm.riscv.vfwnmsac.nxv1f32.nxv1f16( + , + , + , + i32); + +define @intrinsic_vfwnmsac_vv_nxv1f32_nxv1f16_nxv1f16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_vv_nxv1f32_nxv1f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu +; CHECK-NEXT: vfwnmsac.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmsac.nxv1f32.nxv1f16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfwnmsac.mask.nxv1f32.nxv1f16( + , + , + , + , + i32); + +define @intrinsic_vfwnmsac_mask_vv_nxv1f32_nxv1f16_nxv1f16( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_mask_vv_nxv1f32_nxv1f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu +; CHECK-NEXT: vfwnmsac.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmsac.mask.nxv1f32.nxv1f16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfwnmsac.nxv2f32.nxv2f16( + , + , + , + i32); + +define @intrinsic_vfwnmsac_vv_nxv2f32_nxv2f16_nxv2f16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_vv_nxv2f32_nxv2f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu +; CHECK-NEXT: vfwnmsac.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmsac.nxv2f32.nxv2f16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfwnmsac.mask.nxv2f32.nxv2f16( + , + , + , + , + i32); + +define @intrinsic_vfwnmsac_mask_vv_nxv2f32_nxv2f16_nxv2f16( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_mask_vv_nxv2f32_nxv2f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu +; CHECK-NEXT: vfwnmsac.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmsac.mask.nxv2f32.nxv2f16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfwnmsac.nxv4f32.nxv4f16( + , + , + , + i32); + +define @intrinsic_vfwnmsac_vv_nxv4f32_nxv4f16_nxv4f16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_vv_nxv4f32_nxv4f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vfwnmsac.vv v16, v18, v19 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmsac.nxv4f32.nxv4f16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfwnmsac.mask.nxv4f32.nxv4f16( + , + , + , + , + i32); + +define @intrinsic_vfwnmsac_mask_vv_nxv4f32_nxv4f16_nxv4f16( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_mask_vv_nxv4f32_nxv4f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vfwnmsac.vv v16, v18, v19, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmsac.mask.nxv4f32.nxv4f16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfwnmsac.nxv8f32.nxv8f16( + , + , + , + i32); + +define @intrinsic_vfwnmsac_vv_nxv8f32_nxv8f16_nxv8f16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_vv_nxv8f32_nxv8f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu +; CHECK-NEXT: vfwnmsac.vv v16, v20, v22 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmsac.nxv8f32.nxv8f16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfwnmsac.mask.nxv8f32.nxv8f16( + , + , + , + , + i32); + +define @intrinsic_vfwnmsac_mask_vv_nxv8f32_nxv8f16_nxv8f16( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_mask_vv_nxv8f32_nxv8f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu +; CHECK-NEXT: vfwnmsac.vv v16, v20, v22, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmsac.mask.nxv8f32.nxv8f16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfwnmsac.nxv16f32.nxv16f16( + , + , + , + i32); + +define @intrinsic_vfwnmsac_vv_nxv16f32_nxv16f16_nxv16f16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_vv_nxv16f32_nxv16f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a1) +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vsetvli a0, a2, e16,m4,ta,mu +; CHECK-NEXT: vfwnmsac.vv v16, v8, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmsac.nxv16f32.nxv16f16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfwnmsac.mask.nxv16f32.nxv16f16( + , + , + , + , + i32); + +define @intrinsic_vfwnmsac_mask_vv_nxv16f32_nxv16f16_nxv16f16( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_mask_vv_nxv16f32_nxv16f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a1) +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vsetvli a0, a2, e16,m4,ta,mu +; CHECK-NEXT: vfwnmsac.vv v16, v8, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmsac.mask.nxv16f32.nxv16f16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfwnmsac.nxv1f32.f16( + , + half, + , + i32); + +define @intrinsic_vfwnmsac_vf_nxv1f32_f16_nxv1f16( %0, half %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv1f32_f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf4,ta,mu +; CHECK-NEXT: vfwnmsac.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmsac.nxv1f32.f16( + %0, + half %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfwnmsac.mask.nxv1f32.f16( + , + half, + , + , + i32); + +define @intrinsic_vfwnmsac_mask_vf_nxv1f32_f16_nxv1f16( %0, half %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv1f32_f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf4,ta,mu +; CHECK-NEXT: vfwnmsac.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmsac.mask.nxv1f32.f16( + %0, + half %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfwnmsac.nxv2f32.f16( + , + half, + , + i32); + +define @intrinsic_vfwnmsac_vf_nxv2f32_f16_nxv2f16( %0, half %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv2f32_f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf2,ta,mu +; CHECK-NEXT: vfwnmsac.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmsac.nxv2f32.f16( + %0, + half %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfwnmsac.mask.nxv2f32.f16( + , + half, + , + , + i32); + +define @intrinsic_vfwnmsac_mask_vf_nxv2f32_f16_nxv2f16( %0, half %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv2f32_f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf2,ta,mu +; CHECK-NEXT: vfwnmsac.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmsac.mask.nxv2f32.f16( + %0, + half %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfwnmsac.nxv4f32.f16( + , + half, + , + i32); + +define @intrinsic_vfwnmsac_vf_nxv4f32_f16_nxv4f16( %0, half %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv4f32_f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m1,ta,mu +; CHECK-NEXT: vfwnmsac.vf v16, ft0, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmsac.nxv4f32.f16( + %0, + half %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfwnmsac.mask.nxv4f32.f16( + , + half, + , + , + i32); + +define @intrinsic_vfwnmsac_mask_vf_nxv4f32_f16_nxv4f16( %0, half %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv4f32_f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m1,ta,mu +; CHECK-NEXT: vfwnmsac.vf v16, ft0, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmsac.mask.nxv4f32.f16( + %0, + half %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfwnmsac.nxv8f32.f16( + , + half, + , + i32); + +define @intrinsic_vfwnmsac_vf_nxv8f32_f16_nxv8f16( %0, half %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv8f32_f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m2,ta,mu +; CHECK-NEXT: vfwnmsac.vf v16, ft0, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmsac.nxv8f32.f16( + %0, + half %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfwnmsac.mask.nxv8f32.f16( + , + half, + , + , + i32); + +define @intrinsic_vfwnmsac_mask_vf_nxv8f32_f16_nxv8f16( %0, half %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv8f32_f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m2,ta,mu +; CHECK-NEXT: vfwnmsac.vf v16, ft0, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmsac.mask.nxv8f32.f16( + %0, + half %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfwnmsac.nxv16f32.f16( + , + half, + , + i32); + +define @intrinsic_vfwnmsac_vf_nxv16f32_f16_nxv16f16( %0, half %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv16f32_f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a1) +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a2, e16,m4,ta,mu +; CHECK-NEXT: vfwnmsac.vf v16, ft0, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmsac.nxv16f32.f16( + %0, + half %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfwnmsac.mask.nxv16f32.f16( + , + half, + , + , + i32); + +define @intrinsic_vfwnmsac_mask_vf_nxv16f32_f16_nxv16f16( %0, half %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv16f32_f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a1) +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a2, e16,m4,ta,mu +; CHECK-NEXT: vfwnmsac.vf v16, ft0, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmsac.mask.nxv16f32.f16( + %0, + half %1, + %2, + %3, + i32 %4) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-rv64.ll new file mode 100644 index 0000000000000..95bcc7cbec905 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-rv64.ll @@ -0,0 +1,868 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+experimental-zfh -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare @llvm.riscv.vfwnmsac.nxv1f32.nxv1f16( + , + , + , + i64); + +define @intrinsic_vfwnmsac_vv_nxv1f32_nxv1f16_nxv1f16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_vv_nxv1f32_nxv1f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu +; CHECK-NEXT: vfwnmsac.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmsac.nxv1f32.nxv1f16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfwnmsac.mask.nxv1f32.nxv1f16( + , + , + , + , + i64); + +define @intrinsic_vfwnmsac_mask_vv_nxv1f32_nxv1f16_nxv1f16( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_mask_vv_nxv1f32_nxv1f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu +; CHECK-NEXT: vfwnmsac.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmsac.mask.nxv1f32.nxv1f16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfwnmsac.nxv2f32.nxv2f16( + , + , + , + i64); + +define @intrinsic_vfwnmsac_vv_nxv2f32_nxv2f16_nxv2f16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_vv_nxv2f32_nxv2f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu +; CHECK-NEXT: vfwnmsac.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmsac.nxv2f32.nxv2f16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfwnmsac.mask.nxv2f32.nxv2f16( + , + , + , + , + i64); + +define @intrinsic_vfwnmsac_mask_vv_nxv2f32_nxv2f16_nxv2f16( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_mask_vv_nxv2f32_nxv2f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu +; CHECK-NEXT: vfwnmsac.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmsac.mask.nxv2f32.nxv2f16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfwnmsac.nxv4f32.nxv4f16( + , + , + , + i64); + +define @intrinsic_vfwnmsac_vv_nxv4f32_nxv4f16_nxv4f16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_vv_nxv4f32_nxv4f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vfwnmsac.vv v16, v18, v19 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmsac.nxv4f32.nxv4f16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfwnmsac.mask.nxv4f32.nxv4f16( + , + , + , + , + i64); + +define @intrinsic_vfwnmsac_mask_vv_nxv4f32_nxv4f16_nxv4f16( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_mask_vv_nxv4f32_nxv4f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vfwnmsac.vv v16, v18, v19, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmsac.mask.nxv4f32.nxv4f16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfwnmsac.nxv8f32.nxv8f16( + , + , + , + i64); + +define @intrinsic_vfwnmsac_vv_nxv8f32_nxv8f16_nxv8f16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_vv_nxv8f32_nxv8f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu +; CHECK-NEXT: vfwnmsac.vv v16, v20, v22 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmsac.nxv8f32.nxv8f16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfwnmsac.mask.nxv8f32.nxv8f16( + , + , + , + , + i64); + +define @intrinsic_vfwnmsac_mask_vv_nxv8f32_nxv8f16_nxv8f16( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_mask_vv_nxv8f32_nxv8f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu +; CHECK-NEXT: vfwnmsac.vv v16, v20, v22, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmsac.mask.nxv8f32.nxv8f16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfwnmsac.nxv16f32.nxv16f16( + , + , + , + i64); + +define @intrinsic_vfwnmsac_vv_nxv16f32_nxv16f16_nxv16f16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_vv_nxv16f32_nxv16f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a1) +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vsetvli a0, a2, e16,m4,ta,mu +; CHECK-NEXT: vfwnmsac.vv v16, v8, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmsac.nxv16f32.nxv16f16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfwnmsac.mask.nxv16f32.nxv16f16( + , + , + , + , + i64); + +define @intrinsic_vfwnmsac_mask_vv_nxv16f32_nxv16f16_nxv16f16( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_mask_vv_nxv16f32_nxv16f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a1) +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vsetvli a0, a2, e16,m4,ta,mu +; CHECK-NEXT: vfwnmsac.vv v16, v8, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmsac.mask.nxv16f32.nxv16f16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfwnmsac.nxv1f64.nxv1f32( + , + , + , + i64); + +define @intrinsic_vfwnmsac_vv_nxv1f64_nxv1f32_nxv1f32( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_vv_nxv1f64_nxv1f32_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,mf2,ta,mu +; CHECK-NEXT: vfwnmsac.vv v16, v17, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmsac.nxv1f64.nxv1f32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfwnmsac.mask.nxv1f64.nxv1f32( + , + , + , + , + i64); + +define @intrinsic_vfwnmsac_mask_vv_nxv1f64_nxv1f32_nxv1f32( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_mask_vv_nxv1f64_nxv1f32_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,mf2,ta,mu +; CHECK-NEXT: vfwnmsac.vv v16, v17, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmsac.mask.nxv1f64.nxv1f32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfwnmsac.nxv2f64.nxv2f32( + , + , + , + i64); + +define @intrinsic_vfwnmsac_vv_nxv2f64_nxv2f32_nxv2f32( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_vv_nxv2f64_nxv2f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; CHECK-NEXT: vfwnmsac.vv v16, v18, v19 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmsac.nxv2f64.nxv2f32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfwnmsac.mask.nxv2f64.nxv2f32( + , + , + , + , + i64); + +define @intrinsic_vfwnmsac_mask_vv_nxv2f64_nxv2f32_nxv2f32( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_mask_vv_nxv2f64_nxv2f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; CHECK-NEXT: vfwnmsac.vv v16, v18, v19, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmsac.mask.nxv2f64.nxv2f32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfwnmsac.nxv4f64.nxv4f32( + , + , + , + i64); + +define @intrinsic_vfwnmsac_vv_nxv4f64_nxv4f32_nxv4f32( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_vv_nxv4f64_nxv4f32_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu +; CHECK-NEXT: vfwnmsac.vv v16, v20, v22 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmsac.nxv4f64.nxv4f32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfwnmsac.mask.nxv4f64.nxv4f32( + , + , + , + , + i64); + +define @intrinsic_vfwnmsac_mask_vv_nxv4f64_nxv4f32_nxv4f32( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_mask_vv_nxv4f64_nxv4f32_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu +; CHECK-NEXT: vfwnmsac.vv v16, v20, v22, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmsac.mask.nxv4f64.nxv4f32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfwnmsac.nxv8f64.nxv8f32( + , + , + , + i64); + +define @intrinsic_vfwnmsac_vv_nxv8f64_nxv8f32_nxv8f32( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_vv_nxv8f64_nxv8f32_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e32,m4,ta,mu +; CHECK-NEXT: vle32.v v28, (a1) +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vsetvli a0, a2, e32,m4,ta,mu +; CHECK-NEXT: vfwnmsac.vv v16, v8, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmsac.nxv8f64.nxv8f32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfwnmsac.mask.nxv8f64.nxv8f32( + , + , + , + , + i64); + +define @intrinsic_vfwnmsac_mask_vv_nxv8f64_nxv8f32_nxv8f32( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_mask_vv_nxv8f64_nxv8f32_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e32,m4,ta,mu +; CHECK-NEXT: vle32.v v28, (a1) +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vsetvli a0, a2, e32,m4,ta,mu +; CHECK-NEXT: vfwnmsac.vv v16, v8, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmsac.mask.nxv8f64.nxv8f32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfwnmsac.nxv1f32.f16( + , + half, + , + i64); + +define @intrinsic_vfwnmsac_vf_nxv1f32_f16_nxv1f16( %0, half %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv1f32_f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf4,ta,mu +; CHECK-NEXT: vfwnmsac.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmsac.nxv1f32.f16( + %0, + half %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfwnmsac.mask.nxv1f32.f16( + , + half, + , + , + i64); + +define @intrinsic_vfwnmsac_mask_vf_nxv1f32_f16_nxv1f16( %0, half %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv1f32_f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf4,ta,mu +; CHECK-NEXT: vfwnmsac.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmsac.mask.nxv1f32.f16( + %0, + half %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfwnmsac.nxv2f32.f16( + , + half, + , + i64); + +define @intrinsic_vfwnmsac_vf_nxv2f32_f16_nxv2f16( %0, half %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv2f32_f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf2,ta,mu +; CHECK-NEXT: vfwnmsac.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmsac.nxv2f32.f16( + %0, + half %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfwnmsac.mask.nxv2f32.f16( + , + half, + , + , + i64); + +define @intrinsic_vfwnmsac_mask_vf_nxv2f32_f16_nxv2f16( %0, half %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv2f32_f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,mf2,ta,mu +; CHECK-NEXT: vfwnmsac.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmsac.mask.nxv2f32.f16( + %0, + half %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfwnmsac.nxv4f32.f16( + , + half, + , + i64); + +define @intrinsic_vfwnmsac_vf_nxv4f32_f16_nxv4f16( %0, half %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv4f32_f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m1,ta,mu +; CHECK-NEXT: vfwnmsac.vf v16, ft0, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmsac.nxv4f32.f16( + %0, + half %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfwnmsac.mask.nxv4f32.f16( + , + half, + , + , + i64); + +define @intrinsic_vfwnmsac_mask_vf_nxv4f32_f16_nxv4f16( %0, half %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv4f32_f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m1,ta,mu +; CHECK-NEXT: vfwnmsac.vf v16, ft0, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmsac.mask.nxv4f32.f16( + %0, + half %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfwnmsac.nxv8f32.f16( + , + half, + , + i64); + +define @intrinsic_vfwnmsac_vf_nxv8f32_f16_nxv8f16( %0, half %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv8f32_f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m2,ta,mu +; CHECK-NEXT: vfwnmsac.vf v16, ft0, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmsac.nxv8f32.f16( + %0, + half %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfwnmsac.mask.nxv8f32.f16( + , + half, + , + , + i64); + +define @intrinsic_vfwnmsac_mask_vf_nxv8f32_f16_nxv8f16( %0, half %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv8f32_f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m2,ta,mu +; CHECK-NEXT: vfwnmsac.vf v16, ft0, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmsac.mask.nxv8f32.f16( + %0, + half %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfwnmsac.nxv16f32.f16( + , + half, + , + i64); + +define @intrinsic_vfwnmsac_vf_nxv16f32_f16_nxv16f16( %0, half %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv16f32_f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a1) +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a2, e16,m4,ta,mu +; CHECK-NEXT: vfwnmsac.vf v16, ft0, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmsac.nxv16f32.f16( + %0, + half %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfwnmsac.mask.nxv16f32.f16( + , + half, + , + , + i64); + +define @intrinsic_vfwnmsac_mask_vf_nxv16f32_f16_nxv16f16( %0, half %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv16f32_f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a1) +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a2, e16,m4,ta,mu +; CHECK-NEXT: vfwnmsac.vf v16, ft0, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmsac.mask.nxv16f32.f16( + %0, + half %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfwnmsac.nxv1f64.f32( + , + float, + , + i64); + +define @intrinsic_vfwnmsac_vf_nxv1f64_f32_nxv1f32( %0, float %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv1f64_f32_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,mf2,ta,mu +; CHECK-NEXT: vfwnmsac.vf v16, ft0, v17 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmsac.nxv1f64.f32( + %0, + float %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfwnmsac.mask.nxv1f64.f32( + , + float, + , + , + i64); + +define @intrinsic_vfwnmsac_mask_vf_nxv1f64_f32_nxv1f32( %0, float %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv1f64_f32_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,mf2,ta,mu +; CHECK-NEXT: vfwnmsac.vf v16, ft0, v17, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmsac.mask.nxv1f64.f32( + %0, + float %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfwnmsac.nxv2f64.f32( + , + float, + , + i64); + +define @intrinsic_vfwnmsac_vf_nxv2f64_f32_nxv2f32( %0, float %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv2f64_f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m1,ta,mu +; CHECK-NEXT: vfwnmsac.vf v16, ft0, v18 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmsac.nxv2f64.f32( + %0, + float %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfwnmsac.mask.nxv2f64.f32( + , + float, + , + , + i64); + +define @intrinsic_vfwnmsac_mask_vf_nxv2f64_f32_nxv2f32( %0, float %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv2f64_f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m1,ta,mu +; CHECK-NEXT: vfwnmsac.vf v16, ft0, v18, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmsac.mask.nxv2f64.f32( + %0, + float %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfwnmsac.nxv4f64.f32( + , + float, + , + i64); + +define @intrinsic_vfwnmsac_vf_nxv4f64_f32_nxv4f32( %0, float %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv4f64_f32_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m2,ta,mu +; CHECK-NEXT: vfwnmsac.vf v16, ft0, v20 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmsac.nxv4f64.f32( + %0, + float %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfwnmsac.mask.nxv4f64.f32( + , + float, + , + , + i64); + +define @intrinsic_vfwnmsac_mask_vf_nxv4f64_f32_nxv4f32( %0, float %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv4f64_f32_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m2,ta,mu +; CHECK-NEXT: vfwnmsac.vf v16, ft0, v20, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmsac.mask.nxv4f64.f32( + %0, + float %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfwnmsac.nxv8f64.f32( + , + float, + , + i64); + +define @intrinsic_vfwnmsac_vf_nxv8f64_f32_nxv8f32( %0, float %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv8f64_f32_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e32,m4,ta,mu +; CHECK-NEXT: vle32.v v28, (a1) +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a2, e32,m4,ta,mu +; CHECK-NEXT: vfwnmsac.vf v16, ft0, v28 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmsac.nxv8f64.f32( + %0, + float %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfwnmsac.mask.nxv8f64.f32( + , + float, + , + , + i64); + +define @intrinsic_vfwnmsac_mask_vf_nxv8f64_f32_nxv8f32( %0, float %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv8f64_f32_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e32,m4,ta,mu +; CHECK-NEXT: vle32.v v28, (a1) +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: vsetvli a0, a2, e32,m4,ta,mu +; CHECK-NEXT: vfwnmsac.vf v16, ft0, v28, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vfwnmsac.mask.nxv8f64.f32( + %0, + float %1, + %2, + %3, + i64 %4) + + ret %a +} From 221fdedc692672d4f63ee768ae5c541626734240 Mon Sep 17 00:00:00 2001 From: Sebastian Neubauer Date: Tue, 22 Dec 2020 13:53:38 +0100 Subject: [PATCH 160/378] [AMDGPU][GlobalISel] Fold flat vgpr + constant addresses Use getPtrBaseWithConstantOffset in selectFlatOffsetImpl to fold more vgpr+constant addresses. Differential Revision: https://reviews.llvm.org/D93692 --- .../AMDGPU/AMDGPUInstructionSelector.cpp | 18 +- .../GlobalISel/extractelement-stack-lower.ll | 1675 +++++++++-------- .../AMDGPU/GlobalISel/extractelement.ll | 16 +- .../CodeGen/AMDGPU/GlobalISel/flat-scratch.ll | 64 +- .../GlobalISel/llvm.amdgcn.atomic.inc.ll | 260 ++- .../llvm.amdgcn.global.atomic.fadd.ll | 30 +- .../AMDGPU/GlobalISel/load-constant.96.ll | 89 +- 7 files changed, 1177 insertions(+), 975 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index ac6ddbae350b7..bfac1b412051e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -3427,22 +3427,18 @@ AMDGPUInstructionSelector::selectFlatOffsetImpl(MachineOperand &Root) const { if (!STI.hasFlatInstOffsets()) return Default; - const MachineInstr *OpDef = MRI->getVRegDef(Root.getReg()); - if (!OpDef || OpDef->getOpcode() != AMDGPU::G_PTR_ADD) - return Default; - - Optional Offset = - getConstantVRegSExtVal(OpDef->getOperand(2).getReg(), *MRI); - if (!Offset.hasValue()) + Register PtrBase; + int64_t ConstOffset; + std::tie(PtrBase, ConstOffset) = + getPtrBaseWithConstantOffset(Root.getReg(), *MRI); + if (ConstOffset == 0) return Default; unsigned AddrSpace = (*MI->memoperands_begin())->getAddrSpace(); - if (!TII.isLegalFLATOffset(Offset.getValue(), AddrSpace, Signed)) + if (!TII.isLegalFLATOffset(ConstOffset, AddrSpace, Signed)) return Default; - Register BasePtr = OpDef->getOperand(1).getReg(); - - return std::make_pair(BasePtr, Offset.getValue()); + return std::make_pair(PtrBase, ConstOffset); } InstructionSelector::ComplexRendererFns diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll index 6fd99b8406d75..f0eefbd44bfb4 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll @@ -8,279 +8,316 @@ define i32 @v_extract_v64i32_varidx(<64 x i32> addrspace(1)* %ptr, i32 %idx) { ; GCN-LABEL: v_extract_v64i32_varidx: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_mov_b32_e32 v15, v0 ; GCN-NEXT: s_add_u32 s4, s32, 0x3fc0 -; GCN-NEXT: s_mov_b32 s5, 0 +; GCN-NEXT: v_add_co_u32_e32 v12, vcc, 64, v0 ; GCN-NEXT: s_mov_b32 s6, s33 ; GCN-NEXT: s_and_b32 s33, s4, 0xffffc000 +; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:44 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:40 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:36 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:32 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:28 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v47, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v56, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v57, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v58, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v59, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v60, off, s[0:3], s33 ; 4-byte Folded Spill +; GCN-NEXT: v_addc_co_u32_e32 v13, vcc, 0, v1, vcc +; GCN-NEXT: global_load_dwordx4 v[4:7], v[12:13], off offset:16 +; GCN-NEXT: global_load_dwordx4 v[8:11], v[12:13], off offset:32 +; GCN-NEXT: global_load_dwordx4 v[12:15], v[12:13], off offset:48 +; GCN-NEXT: s_mov_b32 s5, 0 ; GCN-NEXT: s_movk_i32 s4, 0x80 -; GCN-NEXT: v_mov_b32_e32 v12, s5 -; GCN-NEXT: v_mov_b32_e32 v16, v1 -; GCN-NEXT: v_add_co_u32_e32 v31, vcc, 64, v15 -; GCN-NEXT: v_mov_b32_e32 v11, s4 -; GCN-NEXT: v_addc_co_u32_e32 v32, vcc, 0, v16, vcc -; GCN-NEXT: v_add_co_u32_e32 v48, vcc, v15, v11 -; GCN-NEXT: v_addc_co_u32_e32 v49, vcc, v16, v12, vcc +; GCN-NEXT: v_mov_b32_e32 v17, s5 +; GCN-NEXT: v_mov_b32_e32 v16, s4 ; GCN-NEXT: s_movk_i32 s4, 0xc0 -; GCN-NEXT: v_mov_b32_e32 v12, s5 -; GCN-NEXT: v_mov_b32_e32 v11, s4 -; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:56 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:44 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:40 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:36 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:32 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v47, off, s[0:3], s33 offset:28 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v56, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v57, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v58, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v59, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v60, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v61, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v62, off, s[0:3], s33 ; 4-byte Folded Spill -; GCN-NEXT: v_add_co_u32_e32 v59, vcc, v15, v11 -; GCN-NEXT: global_load_dwordx4 v[3:6], v[15:16], off -; GCN-NEXT: global_load_dwordx4 v[7:10], v[15:16], off offset:16 -; GCN-NEXT: v_addc_co_u32_e32 v60, vcc, v16, v12, vcc -; GCN-NEXT: global_load_dwordx4 v[11:14], v[15:16], off offset:32 -; GCN-NEXT: global_load_dwordx4 v[15:18], v[15:16], off offset:48 -; GCN-NEXT: global_load_dwordx4 v[19:22], v[31:32], off -; GCN-NEXT: global_load_dwordx4 v[23:26], v[31:32], off offset:16 -; GCN-NEXT: global_load_dwordx4 v[27:30], v[31:32], off offset:32 -; GCN-NEXT: global_load_dwordx4 v[31:34], v[31:32], off offset:48 -; GCN-NEXT: global_load_dwordx4 v[35:38], v[48:49], off -; GCN-NEXT: global_load_dwordx4 v[39:42], v[48:49], off offset:16 -; GCN-NEXT: global_load_dwordx4 v[43:46], v[48:49], off offset:32 -; GCN-NEXT: v_lshrrev_b32_e64 v0, 6, s33 -; GCN-NEXT: v_add_u32_e32 v0, 0x100, v0 -; GCN-NEXT: v_add_u32_e32 v1, 16, v0 ; GCN-NEXT: s_add_u32 s32, s32, 0x10000 ; GCN-NEXT: s_sub_u32 s32, s32, 0x10000 ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: buffer_store_dword v35, off, s[0:3], s33 offset:576 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: buffer_store_dword v36, off, s[0:3], s33 offset:580 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v37, off, s[0:3], s33 offset:584 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v38, off, s[0:3], s33 offset:588 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v39, off, s[0:3], s33 offset:592 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:596 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:600 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:604 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:608 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:612 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:616 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:620 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v47, off, s[0:3], s33 offset:624 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v48, off, s[0:3], s33 offset:628 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v49, off, s[0:3], s33 offset:632 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v50, off, s[0:3], s33 offset:636 ; 4-byte Folded Spill -; GCN-NEXT: global_load_dwordx4 v[47:50], v[48:49], off offset:48 -; GCN-NEXT: global_load_dwordx4 v[43:46], v[59:60], off -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:512 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:516 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:520 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:524 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v47, off, s[0:3], s33 offset:528 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v48, off, s[0:3], s33 offset:532 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v49, off, s[0:3], s33 offset:536 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v50, off, s[0:3], s33 offset:540 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v51, off, s[0:3], s33 offset:544 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v52, off, s[0:3], s33 offset:548 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v53, off, s[0:3], s33 offset:552 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v54, off, s[0:3], s33 offset:556 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v55, off, s[0:3], s33 offset:560 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v56, off, s[0:3], s33 offset:564 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v57, off, s[0:3], s33 offset:568 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v58, off, s[0:3], s33 offset:572 ; 4-byte Folded Spill -; GCN-NEXT: global_load_dwordx4 v[51:54], v[59:60], off offset:16 -; GCN-NEXT: global_load_dwordx4 v[55:58], v[59:60], off offset:32 -; GCN-NEXT: global_load_dwordx4 v[59:62], v[59:60], off offset:48 -; GCN-NEXT: buffer_store_dword v7, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 20, v0 -; GCN-NEXT: buffer_store_dword v8, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 24, v0 -; GCN-NEXT: buffer_store_dword v9, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 28, v0 -; GCN-NEXT: buffer_store_dword v10, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 32, v0 -; GCN-NEXT: buffer_store_dword v11, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 36, v0 -; GCN-NEXT: buffer_store_dword v12, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 40, v0 -; GCN-NEXT: buffer_store_dword v13, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 44, v0 -; GCN-NEXT: buffer_store_dword v14, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 48, v0 -; GCN-NEXT: buffer_store_dword v15, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 52, v0 -; GCN-NEXT: buffer_store_dword v16, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 56, v0 -; GCN-NEXT: buffer_store_dword v17, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 60, v0 -; GCN-NEXT: buffer_store_dword v18, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 64, v0 -; GCN-NEXT: buffer_store_dword v19, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0x44, v0 -; GCN-NEXT: buffer_store_dword v20, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0x48, v0 -; GCN-NEXT: buffer_store_dword v21, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0x4c, v0 -; GCN-NEXT: buffer_store_dword v22, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0x50, v0 -; GCN-NEXT: buffer_store_dword v23, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0x54, v0 -; GCN-NEXT: buffer_store_dword v24, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0x58, v0 -; GCN-NEXT: buffer_store_dword v25, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0x5c, v0 -; GCN-NEXT: buffer_store_dword v26, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0x60, v0 -; GCN-NEXT: buffer_store_dword v27, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0x64, v0 -; GCN-NEXT: buffer_store_dword v28, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0x68, v0 -; GCN-NEXT: buffer_store_dword v29, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0x6c, v0 -; GCN-NEXT: buffer_store_dword v30, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0x70, v0 -; GCN-NEXT: buffer_store_dword v31, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0x74, v0 -; GCN-NEXT: buffer_store_dword v32, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0x78, v0 -; GCN-NEXT: buffer_store_dword v33, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0x7c, v0 -; GCN-NEXT: buffer_store_dword v34, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0x80, v0 -; GCN-NEXT: buffer_store_dword v35, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0x84, v0 -; GCN-NEXT: buffer_store_dword v36, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0x88, v0 -; GCN-NEXT: buffer_store_dword v37, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0x8c, v0 -; GCN-NEXT: buffer_store_dword v38, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0x90, v0 -; GCN-NEXT: buffer_store_dword v39, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0x94, v0 -; GCN-NEXT: buffer_store_dword v40, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0x98, v0 -; GCN-NEXT: buffer_store_dword v41, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0x9c, v0 -; GCN-NEXT: buffer_store_dword v42, v1, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:576 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:580 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:584 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:588 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:592 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v12, off, s[0:3], s33 offset:596 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v13, off, s[0:3], s33 offset:600 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s33 offset:604 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:608 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:612 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v17, off, s[0:3], s33 offset:616 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v18, off, s[0:3], s33 offset:620 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v19, off, s[0:3], s33 offset:624 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v20, off, s[0:3], s33 offset:628 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v21, off, s[0:3], s33 offset:632 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v22, off, s[0:3], s33 offset:636 ; 4-byte Folded Reload -; GCN-NEXT: v_add_u32_e32 v1, 0xa0, v0 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_mov_b32_e32 v8, v15 -; GCN-NEXT: buffer_store_dword v8, v1, s[0:3], 0 offen -; GCN-NEXT: v_mov_b32_e32 v9, v16 -; GCN-NEXT: v_add_u32_e32 v1, 0xa4, v0 -; GCN-NEXT: buffer_store_dword v9, v1, s[0:3], 0 offen -; GCN-NEXT: v_mov_b32_e32 v10, v17 -; GCN-NEXT: v_add_u32_e32 v1, 0xa8, v0 -; GCN-NEXT: buffer_store_dword v10, v1, s[0:3], 0 offen -; GCN-NEXT: v_mov_b32_e32 v11, v18 -; GCN-NEXT: v_add_u32_e32 v1, 0xac, v0 -; GCN-NEXT: buffer_store_dword v11, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0xb0, v0 -; GCN-NEXT: buffer_store_dword v47, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0xb4, v0 -; GCN-NEXT: buffer_store_dword v48, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0xb8, v0 -; GCN-NEXT: buffer_store_dword v49, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0xbc, v0 -; GCN-NEXT: buffer_store_dword v50, v1, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:512 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:516 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:520 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:524 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:528 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v12, off, s[0:3], s33 offset:532 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v13, off, s[0:3], s33 offset:536 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s33 offset:540 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:544 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:548 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v17, off, s[0:3], s33 offset:552 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v18, off, s[0:3], s33 offset:556 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v19, off, s[0:3], s33 offset:560 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v20, off, s[0:3], s33 offset:564 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v21, off, s[0:3], s33 offset:568 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v22, off, s[0:3], s33 offset:572 ; 4-byte Folded Reload -; GCN-NEXT: v_add_u32_e32 v1, 0xc0, v0 +; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:576 ; 4-byte Folded Spill ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: buffer_store_dword v7, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0xc4, v0 -; GCN-NEXT: buffer_store_dword v8, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0xc8, v0 -; GCN-NEXT: buffer_store_dword v9, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0xcc, v0 -; GCN-NEXT: buffer_store_dword v10, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 4, v0 -; GCN-NEXT: buffer_store_dword v4, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 8, v0 -; GCN-NEXT: buffer_store_dword v5, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 12, v0 -; GCN-NEXT: buffer_store_dword v6, v1, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:256 -; GCN-NEXT: v_add_u32_e32 v1, 0xd0, v0 -; GCN-NEXT: buffer_store_dword v51, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0xd4, v0 -; GCN-NEXT: buffer_store_dword v52, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0xd8, v0 -; GCN-NEXT: buffer_store_dword v53, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0xdc, v0 -; GCN-NEXT: buffer_store_dword v54, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0xe0, v0 -; GCN-NEXT: buffer_store_dword v55, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0xe4, v0 -; GCN-NEXT: buffer_store_dword v56, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0xe8, v0 -; GCN-NEXT: buffer_store_dword v57, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0xec, v0 -; GCN-NEXT: buffer_store_dword v58, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0xf0, v0 -; GCN-NEXT: buffer_store_dword v59, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0xf4, v0 -; GCN-NEXT: buffer_store_dword v60, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0xf8, v0 -; GCN-NEXT: buffer_store_dword v61, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0xfc, v0 -; GCN-NEXT: buffer_store_dword v62, v1, s[0:3], 0 offen -; GCN-NEXT: v_and_b32_e32 v1, 63, v2 -; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v1 -; GCN-NEXT: v_add_u32_e32 v0, v0, v1 +; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:580 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:584 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:588 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v4, off, s[0:3], s33 offset:592 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v5, off, s[0:3], s33 offset:596 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v6, off, s[0:3], s33 offset:600 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v7, off, s[0:3], s33 offset:604 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v8, off, s[0:3], s33 offset:608 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:612 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v10, off, s[0:3], s33 offset:616 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v11, off, s[0:3], s33 offset:620 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v12, off, s[0:3], s33 offset:624 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v13, off, s[0:3], s33 offset:628 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v14, off, s[0:3], s33 offset:632 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v15, off, s[0:3], s33 offset:636 ; 4-byte Folded Spill +; GCN-NEXT: v_lshrrev_b32_e64 v3, 6, s33 +; GCN-NEXT: v_add_u32_e32 v3, 0x100, v3 +; GCN-NEXT: v_add_u32_e32 v60, 16, v3 +; GCN-NEXT: v_add_co_u32_e32 v52, vcc, v0, v16 +; GCN-NEXT: v_addc_co_u32_e32 v53, vcc, v1, v17, vcc +; GCN-NEXT: v_mov_b32_e32 v17, s5 +; GCN-NEXT: v_mov_b32_e32 v16, s4 +; GCN-NEXT: v_add_co_u32_e32 v56, vcc, v0, v16 +; GCN-NEXT: v_addc_co_u32_e32 v57, vcc, v1, v17, vcc +; GCN-NEXT: global_load_dwordx4 v[16:19], v[0:1], off +; GCN-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:16 +; GCN-NEXT: global_load_dwordx4 v[24:27], v[0:1], off offset:32 +; GCN-NEXT: global_load_dwordx4 v[28:31], v[0:1], off offset:48 +; GCN-NEXT: global_load_dwordx4 v[32:35], v[0:1], off offset:64 +; GCN-NEXT: global_load_dwordx4 v[36:39], v[0:1], off offset:128 +; GCN-NEXT: global_load_dwordx4 v[40:43], v[0:1], off offset:192 +; GCN-NEXT: global_load_dwordx4 v[44:47], v[52:53], off offset:16 +; GCN-NEXT: global_load_dwordx4 v[48:51], v[52:53], off offset:32 +; GCN-NEXT: global_load_dwordx4 v[52:55], v[52:53], off offset:48 +; GCN-NEXT: global_load_dwordx4 v[12:15], v[56:57], off offset:16 +; GCN-NEXT: v_add_u32_e32 v0, 20, v3 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v8, off, s[0:3], s33 offset:640 ; 4-byte Folded Spill +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:644 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v10, off, s[0:3], s33 offset:648 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v11, off, s[0:3], s33 offset:652 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v12, off, s[0:3], s33 offset:656 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v13, off, s[0:3], s33 offset:660 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v14, off, s[0:3], s33 offset:664 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v15, off, s[0:3], s33 offset:668 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v16, off, s[0:3], s33 offset:672 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v17, off, s[0:3], s33 offset:676 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v18, off, s[0:3], s33 offset:680 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v19, off, s[0:3], s33 offset:684 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v20, off, s[0:3], s33 offset:688 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v21, off, s[0:3], s33 offset:692 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v22, off, s[0:3], s33 offset:696 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v23, off, s[0:3], s33 offset:700 ; 4-byte Folded Spill +; GCN-NEXT: global_load_dwordx4 v[12:15], v[56:57], off offset:32 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v4, off, s[0:3], s33 offset:512 ; 4-byte Folded Spill +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v5, off, s[0:3], s33 offset:516 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v6, off, s[0:3], s33 offset:520 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v7, off, s[0:3], s33 offset:524 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v8, off, s[0:3], s33 offset:528 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:532 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v10, off, s[0:3], s33 offset:536 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v11, off, s[0:3], s33 offset:540 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v12, off, s[0:3], s33 offset:544 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v13, off, s[0:3], s33 offset:548 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v14, off, s[0:3], s33 offset:552 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v15, off, s[0:3], s33 offset:556 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v16, off, s[0:3], s33 offset:560 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v17, off, s[0:3], s33 offset:564 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v18, off, s[0:3], s33 offset:568 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v19, off, s[0:3], s33 offset:572 ; 4-byte Folded Spill +; GCN-NEXT: global_load_dwordx4 v[56:59], v[56:57], off offset:48 +; GCN-NEXT: buffer_store_dword v21, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 24, v3 +; GCN-NEXT: buffer_store_dword v22, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 28, v3 +; GCN-NEXT: buffer_store_dword v23, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 32, v3 +; GCN-NEXT: buffer_store_dword v24, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 36, v3 +; GCN-NEXT: buffer_store_dword v25, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 40, v3 +; GCN-NEXT: buffer_store_dword v26, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 44, v3 +; GCN-NEXT: buffer_store_dword v27, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 48, v3 +; GCN-NEXT: buffer_store_dword v28, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 52, v3 +; GCN-NEXT: buffer_store_dword v29, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 56, v3 +; GCN-NEXT: buffer_store_dword v30, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 60, v3 +; GCN-NEXT: buffer_store_dword v31, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 64, v3 +; GCN-NEXT: buffer_store_dword v32, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0x44, v3 +; GCN-NEXT: buffer_store_dword v33, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0x48, v3 +; GCN-NEXT: buffer_store_dword v34, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0x4c, v3 +; GCN-NEXT: buffer_store_dword v35, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0x50, v3 +; GCN-NEXT: buffer_store_dword v20, v60, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0x54, v3 +; GCN-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0x58, v3 +; GCN-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0x5c, v3 +; GCN-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0x60, v3 +; GCN-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0x64, v3 +; GCN-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0x68, v3 +; GCN-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0x6c, v3 +; GCN-NEXT: buffer_store_dword v11, v0, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v20, off, s[0:3], s33 offset:576 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v21, off, s[0:3], s33 offset:580 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v22, off, s[0:3], s33 offset:584 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v23, off, s[0:3], s33 offset:588 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v24, off, s[0:3], s33 offset:592 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v25, off, s[0:3], s33 offset:596 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v26, off, s[0:3], s33 offset:600 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v27, off, s[0:3], s33 offset:604 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v28, off, s[0:3], s33 offset:608 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v29, off, s[0:3], s33 offset:612 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v30, off, s[0:3], s33 offset:616 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v31, off, s[0:3], s33 offset:620 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s33 offset:624 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:628 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v34, off, s[0:3], s33 offset:632 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v35, off, s[0:3], s33 offset:636 ; 4-byte Folded Reload +; GCN-NEXT: v_add_u32_e32 v0, 0x70, v3 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v12, v32 +; GCN-NEXT: buffer_store_dword v12, v0, s[0:3], 0 offen +; GCN-NEXT: v_mov_b32_e32 v13, v33 +; GCN-NEXT: v_add_u32_e32 v0, 0x74, v3 +; GCN-NEXT: buffer_store_dword v13, v0, s[0:3], 0 offen +; GCN-NEXT: v_mov_b32_e32 v14, v34 +; GCN-NEXT: v_add_u32_e32 v0, 0x78, v3 +; GCN-NEXT: buffer_store_dword v14, v0, s[0:3], 0 offen +; GCN-NEXT: v_mov_b32_e32 v15, v35 +; GCN-NEXT: v_add_u32_e32 v0, 0x7c, v3 +; GCN-NEXT: buffer_store_dword v15, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0x80, v3 +; GCN-NEXT: buffer_store_dword v36, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0x84, v3 +; GCN-NEXT: buffer_store_dword v37, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0x88, v3 +; GCN-NEXT: buffer_store_dword v38, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0x8c, v3 +; GCN-NEXT: buffer_store_dword v39, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0x90, v3 +; GCN-NEXT: buffer_store_dword v44, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0x94, v3 +; GCN-NEXT: buffer_store_dword v45, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0x98, v3 +; GCN-NEXT: buffer_store_dword v46, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0x9c, v3 +; GCN-NEXT: buffer_store_dword v47, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0xa0, v3 +; GCN-NEXT: buffer_store_dword v48, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0xa4, v3 +; GCN-NEXT: buffer_store_dword v49, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0xa8, v3 +; GCN-NEXT: buffer_store_dword v50, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0xac, v3 +; GCN-NEXT: buffer_store_dword v51, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0xb0, v3 +; GCN-NEXT: buffer_store_dword v52, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0xb4, v3 +; GCN-NEXT: buffer_store_dword v53, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0xb8, v3 +; GCN-NEXT: buffer_store_dword v54, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0xbc, v3 +; GCN-NEXT: buffer_store_dword v55, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0xc0, v3 +; GCN-NEXT: buffer_store_dword v40, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0xc4, v3 +; GCN-NEXT: buffer_store_dword v41, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0xc8, v3 +; GCN-NEXT: buffer_store_dword v42, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0xcc, v3 +; GCN-NEXT: buffer_store_dword v43, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 4, v3 +; GCN-NEXT: buffer_store_dword v17, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 8, v3 +; GCN-NEXT: buffer_store_dword v18, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 12, v3 +; GCN-NEXT: buffer_store_dword v19, v0, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v16, off, s[0:3], s33 offset:256 +; GCN-NEXT: buffer_load_dword v4, off, s[0:3], s33 offset:640 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v5, off, s[0:3], s33 offset:644 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v6, off, s[0:3], s33 offset:648 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:652 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:656 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:660 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:664 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:668 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v12, off, s[0:3], s33 offset:672 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v13, off, s[0:3], s33 offset:676 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s33 offset:680 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:684 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:688 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v17, off, s[0:3], s33 offset:692 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v18, off, s[0:3], s33 offset:696 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v19, off, s[0:3], s33 offset:700 ; 4-byte Folded Reload +; GCN-NEXT: v_add_u32_e32 v0, 0xd0, v3 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v4, v8 +; GCN-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen +; GCN-NEXT: v_mov_b32_e32 v5, v9 +; GCN-NEXT: v_add_u32_e32 v0, 0xd4, v3 +; GCN-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen +; GCN-NEXT: v_mov_b32_e32 v6, v10 +; GCN-NEXT: v_add_u32_e32 v0, 0xd8, v3 +; GCN-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen +; GCN-NEXT: v_mov_b32_e32 v7, v11 +; GCN-NEXT: v_add_u32_e32 v0, 0xdc, v3 +; GCN-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v4, off, s[0:3], s33 offset:512 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v5, off, s[0:3], s33 offset:516 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v6, off, s[0:3], s33 offset:520 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:524 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:528 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:532 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:536 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:540 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v12, off, s[0:3], s33 offset:544 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v13, off, s[0:3], s33 offset:548 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s33 offset:552 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:556 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:560 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v17, off, s[0:3], s33 offset:564 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v18, off, s[0:3], s33 offset:568 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v19, off, s[0:3], s33 offset:572 ; 4-byte Folded Reload +; GCN-NEXT: v_add_u32_e32 v0, 0xe0, v3 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v8, v12 +; GCN-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen +; GCN-NEXT: v_mov_b32_e32 v9, v13 +; GCN-NEXT: v_add_u32_e32 v0, 0xe4, v3 +; GCN-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen +; GCN-NEXT: v_mov_b32_e32 v10, v14 +; GCN-NEXT: v_add_u32_e32 v0, 0xe8, v3 +; GCN-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen +; GCN-NEXT: v_mov_b32_e32 v11, v15 +; GCN-NEXT: v_add_u32_e32 v0, 0xec, v3 +; GCN-NEXT: buffer_store_dword v11, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0xf0, v3 +; GCN-NEXT: buffer_store_dword v56, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0xf4, v3 +; GCN-NEXT: buffer_store_dword v57, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0xf8, v3 +; GCN-NEXT: buffer_store_dword v58, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0xfc, v3 +; GCN-NEXT: buffer_store_dword v59, v0, s[0:3], 0 offen +; GCN-NEXT: v_and_b32_e32 v0, 63, v2 +; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GCN-NEXT: v_add_u32_e32 v0, v3, v0 ; GCN-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v62, off, s[0:3], s33 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v61, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v60, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v59, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v58, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v57, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v56, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v47, off, s[0:3], s33 offset:28 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v46, off, s[0:3], s33 offset:32 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v45, off, s[0:3], s33 offset:36 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v44, off, s[0:3], s33 offset:40 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v43, off, s[0:3], s33 offset:44 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:48 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:52 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:56 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v60, off, s[0:3], s33 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v59, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v58, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v57, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v56, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v47, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v46, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v45, off, s[0:3], s33 offset:28 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v44, off, s[0:3], s33 offset:32 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v43, off, s[0:3], s33 offset:36 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:40 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:44 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:48 ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b32 s33, s6 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 s[30:31] @@ -293,284 +330,321 @@ define i16 @v_extract_v128i16_varidx(<128 x i16> addrspace(1)* %ptr, i32 %idx) { ; GCN-LABEL: v_extract_v128i16_varidx: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_mov_b32_e32 v15, v0 ; GCN-NEXT: s_add_u32 s4, s32, 0x3fc0 -; GCN-NEXT: s_mov_b32 s5, 0 +; GCN-NEXT: v_add_co_u32_e32 v12, vcc, 64, v0 ; GCN-NEXT: s_mov_b32 s6, s33 ; GCN-NEXT: s_and_b32 s33, s4, 0xffffc000 +; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:44 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:40 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:36 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:32 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:28 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v47, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v56, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v57, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v58, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v59, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v60, off, s[0:3], s33 ; 4-byte Folded Spill +; GCN-NEXT: v_addc_co_u32_e32 v13, vcc, 0, v1, vcc +; GCN-NEXT: global_load_dwordx4 v[4:7], v[12:13], off offset:16 +; GCN-NEXT: global_load_dwordx4 v[8:11], v[12:13], off offset:32 +; GCN-NEXT: global_load_dwordx4 v[12:15], v[12:13], off offset:48 +; GCN-NEXT: s_mov_b32 s5, 0 ; GCN-NEXT: s_movk_i32 s4, 0x80 -; GCN-NEXT: v_mov_b32_e32 v12, s5 -; GCN-NEXT: v_mov_b32_e32 v16, v1 -; GCN-NEXT: v_add_co_u32_e32 v31, vcc, 64, v15 -; GCN-NEXT: v_mov_b32_e32 v11, s4 -; GCN-NEXT: v_addc_co_u32_e32 v32, vcc, 0, v16, vcc -; GCN-NEXT: v_add_co_u32_e32 v48, vcc, v15, v11 -; GCN-NEXT: v_addc_co_u32_e32 v49, vcc, v16, v12, vcc +; GCN-NEXT: v_mov_b32_e32 v17, s5 +; GCN-NEXT: v_mov_b32_e32 v16, s4 ; GCN-NEXT: s_movk_i32 s4, 0xc0 -; GCN-NEXT: v_mov_b32_e32 v12, s5 -; GCN-NEXT: v_mov_b32_e32 v11, s4 -; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:56 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:44 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:40 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:36 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:32 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v47, off, s[0:3], s33 offset:28 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v56, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v57, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v58, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v59, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v60, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v61, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v62, off, s[0:3], s33 ; 4-byte Folded Spill -; GCN-NEXT: v_add_co_u32_e32 v59, vcc, v15, v11 -; GCN-NEXT: global_load_dwordx4 v[3:6], v[15:16], off -; GCN-NEXT: global_load_dwordx4 v[7:10], v[15:16], off offset:16 -; GCN-NEXT: v_addc_co_u32_e32 v60, vcc, v16, v12, vcc -; GCN-NEXT: global_load_dwordx4 v[11:14], v[15:16], off offset:32 -; GCN-NEXT: global_load_dwordx4 v[15:18], v[15:16], off offset:48 -; GCN-NEXT: global_load_dwordx4 v[19:22], v[31:32], off -; GCN-NEXT: global_load_dwordx4 v[23:26], v[31:32], off offset:16 -; GCN-NEXT: global_load_dwordx4 v[27:30], v[31:32], off offset:32 -; GCN-NEXT: global_load_dwordx4 v[31:34], v[31:32], off offset:48 -; GCN-NEXT: global_load_dwordx4 v[35:38], v[48:49], off -; GCN-NEXT: global_load_dwordx4 v[39:42], v[48:49], off offset:16 -; GCN-NEXT: global_load_dwordx4 v[43:46], v[48:49], off offset:32 -; GCN-NEXT: v_lshrrev_b32_e64 v0, 6, s33 -; GCN-NEXT: v_add_u32_e32 v0, 0x100, v0 -; GCN-NEXT: v_add_u32_e32 v1, 16, v0 ; GCN-NEXT: s_add_u32 s32, s32, 0x10000 ; GCN-NEXT: s_sub_u32 s32, s32, 0x10000 ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: buffer_store_dword v35, off, s[0:3], s33 offset:576 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: buffer_store_dword v36, off, s[0:3], s33 offset:580 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v37, off, s[0:3], s33 offset:584 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v38, off, s[0:3], s33 offset:588 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v39, off, s[0:3], s33 offset:592 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:596 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:600 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:604 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:608 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:612 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:616 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:620 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v47, off, s[0:3], s33 offset:624 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v48, off, s[0:3], s33 offset:628 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v49, off, s[0:3], s33 offset:632 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v50, off, s[0:3], s33 offset:636 ; 4-byte Folded Spill -; GCN-NEXT: global_load_dwordx4 v[47:50], v[48:49], off offset:48 -; GCN-NEXT: global_load_dwordx4 v[43:46], v[59:60], off -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:512 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:516 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:520 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:524 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v47, off, s[0:3], s33 offset:528 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v48, off, s[0:3], s33 offset:532 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v49, off, s[0:3], s33 offset:536 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v50, off, s[0:3], s33 offset:540 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v51, off, s[0:3], s33 offset:544 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v52, off, s[0:3], s33 offset:548 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v53, off, s[0:3], s33 offset:552 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v54, off, s[0:3], s33 offset:556 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v55, off, s[0:3], s33 offset:560 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v56, off, s[0:3], s33 offset:564 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v57, off, s[0:3], s33 offset:568 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v58, off, s[0:3], s33 offset:572 ; 4-byte Folded Spill -; GCN-NEXT: global_load_dwordx4 v[51:54], v[59:60], off offset:16 -; GCN-NEXT: global_load_dwordx4 v[55:58], v[59:60], off offset:32 -; GCN-NEXT: global_load_dwordx4 v[59:62], v[59:60], off offset:48 -; GCN-NEXT: buffer_store_dword v7, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 20, v0 -; GCN-NEXT: buffer_store_dword v8, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 24, v0 -; GCN-NEXT: buffer_store_dword v9, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 28, v0 -; GCN-NEXT: buffer_store_dword v10, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 32, v0 -; GCN-NEXT: buffer_store_dword v11, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 36, v0 -; GCN-NEXT: buffer_store_dword v12, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 40, v0 -; GCN-NEXT: buffer_store_dword v13, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 44, v0 -; GCN-NEXT: buffer_store_dword v14, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 48, v0 -; GCN-NEXT: buffer_store_dword v15, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 52, v0 -; GCN-NEXT: buffer_store_dword v16, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 56, v0 -; GCN-NEXT: buffer_store_dword v17, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 60, v0 -; GCN-NEXT: buffer_store_dword v18, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 64, v0 -; GCN-NEXT: buffer_store_dword v19, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0x44, v0 -; GCN-NEXT: buffer_store_dword v20, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0x48, v0 -; GCN-NEXT: buffer_store_dword v21, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0x4c, v0 -; GCN-NEXT: buffer_store_dword v22, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0x50, v0 -; GCN-NEXT: buffer_store_dword v23, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0x54, v0 -; GCN-NEXT: buffer_store_dword v24, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0x58, v0 -; GCN-NEXT: buffer_store_dword v25, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0x5c, v0 -; GCN-NEXT: buffer_store_dword v26, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0x60, v0 -; GCN-NEXT: buffer_store_dword v27, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0x64, v0 -; GCN-NEXT: buffer_store_dword v28, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0x68, v0 -; GCN-NEXT: buffer_store_dword v29, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0x6c, v0 -; GCN-NEXT: buffer_store_dword v30, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0x70, v0 -; GCN-NEXT: buffer_store_dword v31, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0x74, v0 -; GCN-NEXT: buffer_store_dword v32, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0x78, v0 -; GCN-NEXT: buffer_store_dword v33, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0x7c, v0 -; GCN-NEXT: buffer_store_dword v34, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0x80, v0 -; GCN-NEXT: buffer_store_dword v35, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0x84, v0 -; GCN-NEXT: buffer_store_dword v36, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0x88, v0 -; GCN-NEXT: buffer_store_dword v37, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0x8c, v0 -; GCN-NEXT: buffer_store_dword v38, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0x90, v0 -; GCN-NEXT: buffer_store_dword v39, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0x94, v0 -; GCN-NEXT: buffer_store_dword v40, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0x98, v0 -; GCN-NEXT: buffer_store_dword v41, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0x9c, v0 -; GCN-NEXT: buffer_store_dword v42, v1, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:576 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:580 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:584 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:588 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:592 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v12, off, s[0:3], s33 offset:596 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v13, off, s[0:3], s33 offset:600 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s33 offset:604 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:608 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:612 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v17, off, s[0:3], s33 offset:616 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v18, off, s[0:3], s33 offset:620 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v19, off, s[0:3], s33 offset:624 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v20, off, s[0:3], s33 offset:628 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v21, off, s[0:3], s33 offset:632 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v22, off, s[0:3], s33 offset:636 ; 4-byte Folded Reload -; GCN-NEXT: v_add_u32_e32 v1, 0xa0, v0 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_mov_b32_e32 v8, v15 -; GCN-NEXT: buffer_store_dword v8, v1, s[0:3], 0 offen -; GCN-NEXT: v_mov_b32_e32 v9, v16 -; GCN-NEXT: v_add_u32_e32 v1, 0xa4, v0 -; GCN-NEXT: buffer_store_dword v9, v1, s[0:3], 0 offen -; GCN-NEXT: v_mov_b32_e32 v10, v17 -; GCN-NEXT: v_add_u32_e32 v1, 0xa8, v0 -; GCN-NEXT: buffer_store_dword v10, v1, s[0:3], 0 offen -; GCN-NEXT: v_mov_b32_e32 v11, v18 -; GCN-NEXT: v_add_u32_e32 v1, 0xac, v0 -; GCN-NEXT: buffer_store_dword v11, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0xb0, v0 -; GCN-NEXT: buffer_store_dword v47, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0xb4, v0 -; GCN-NEXT: buffer_store_dword v48, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0xb8, v0 -; GCN-NEXT: buffer_store_dword v49, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0xbc, v0 -; GCN-NEXT: buffer_store_dword v50, v1, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:512 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:516 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:520 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:524 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:528 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v12, off, s[0:3], s33 offset:532 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v13, off, s[0:3], s33 offset:536 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s33 offset:540 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:544 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:548 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v17, off, s[0:3], s33 offset:552 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v18, off, s[0:3], s33 offset:556 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v19, off, s[0:3], s33 offset:560 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v20, off, s[0:3], s33 offset:564 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v21, off, s[0:3], s33 offset:568 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v22, off, s[0:3], s33 offset:572 ; 4-byte Folded Reload -; GCN-NEXT: v_add_u32_e32 v1, 0xc0, v0 +; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:576 ; 4-byte Folded Spill +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:580 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:584 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:588 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v4, off, s[0:3], s33 offset:592 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v5, off, s[0:3], s33 offset:596 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v6, off, s[0:3], s33 offset:600 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v7, off, s[0:3], s33 offset:604 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v8, off, s[0:3], s33 offset:608 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:612 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v10, off, s[0:3], s33 offset:616 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v11, off, s[0:3], s33 offset:620 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v12, off, s[0:3], s33 offset:624 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v13, off, s[0:3], s33 offset:628 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v14, off, s[0:3], s33 offset:632 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v15, off, s[0:3], s33 offset:636 ; 4-byte Folded Spill +; GCN-NEXT: v_lshrrev_b32_e64 v3, 6, s33 +; GCN-NEXT: v_add_u32_e32 v3, 0x100, v3 +; GCN-NEXT: v_add_u32_e32 v60, 16, v3 +; GCN-NEXT: v_add_co_u32_e32 v52, vcc, v0, v16 +; GCN-NEXT: v_addc_co_u32_e32 v53, vcc, v1, v17, vcc +; GCN-NEXT: v_mov_b32_e32 v17, s5 +; GCN-NEXT: v_mov_b32_e32 v16, s4 +; GCN-NEXT: v_add_co_u32_e32 v56, vcc, v0, v16 +; GCN-NEXT: v_addc_co_u32_e32 v57, vcc, v1, v17, vcc +; GCN-NEXT: global_load_dwordx4 v[16:19], v[0:1], off +; GCN-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:16 +; GCN-NEXT: global_load_dwordx4 v[24:27], v[0:1], off offset:32 +; GCN-NEXT: global_load_dwordx4 v[28:31], v[0:1], off offset:48 +; GCN-NEXT: global_load_dwordx4 v[32:35], v[0:1], off offset:64 +; GCN-NEXT: global_load_dwordx4 v[36:39], v[0:1], off offset:128 +; GCN-NEXT: global_load_dwordx4 v[40:43], v[0:1], off offset:192 +; GCN-NEXT: global_load_dwordx4 v[44:47], v[52:53], off offset:16 +; GCN-NEXT: global_load_dwordx4 v[48:51], v[52:53], off offset:32 +; GCN-NEXT: global_load_dwordx4 v[52:55], v[52:53], off offset:48 +; GCN-NEXT: global_load_dwordx4 v[12:15], v[56:57], off offset:16 +; GCN-NEXT: v_add_u32_e32 v0, 20, v3 +; GCN-NEXT: v_add_u32_e32 v1, 0xd0, v3 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v8, off, s[0:3], s33 offset:640 ; 4-byte Folded Spill +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:644 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v10, off, s[0:3], s33 offset:648 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v11, off, s[0:3], s33 offset:652 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v12, off, s[0:3], s33 offset:656 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v13, off, s[0:3], s33 offset:660 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v14, off, s[0:3], s33 offset:664 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v15, off, s[0:3], s33 offset:668 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v16, off, s[0:3], s33 offset:672 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v17, off, s[0:3], s33 offset:676 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v18, off, s[0:3], s33 offset:680 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v19, off, s[0:3], s33 offset:684 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v20, off, s[0:3], s33 offset:688 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v21, off, s[0:3], s33 offset:692 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v22, off, s[0:3], s33 offset:696 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v23, off, s[0:3], s33 offset:700 ; 4-byte Folded Spill +; GCN-NEXT: global_load_dwordx4 v[12:15], v[56:57], off offset:32 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v4, off, s[0:3], s33 offset:512 ; 4-byte Folded Spill ; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v5, off, s[0:3], s33 offset:516 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v6, off, s[0:3], s33 offset:520 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v7, off, s[0:3], s33 offset:524 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v8, off, s[0:3], s33 offset:528 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:532 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v10, off, s[0:3], s33 offset:536 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v11, off, s[0:3], s33 offset:540 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v12, off, s[0:3], s33 offset:544 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v13, off, s[0:3], s33 offset:548 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v14, off, s[0:3], s33 offset:552 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v15, off, s[0:3], s33 offset:556 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v16, off, s[0:3], s33 offset:560 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v17, off, s[0:3], s33 offset:564 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v18, off, s[0:3], s33 offset:568 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v19, off, s[0:3], s33 offset:572 ; 4-byte Folded Spill +; GCN-NEXT: global_load_dwordx4 v[56:59], v[56:57], off offset:48 +; GCN-NEXT: buffer_store_dword v21, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 24, v3 +; GCN-NEXT: buffer_store_dword v22, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 28, v3 +; GCN-NEXT: buffer_store_dword v23, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 32, v3 +; GCN-NEXT: buffer_store_dword v24, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 36, v3 +; GCN-NEXT: buffer_store_dword v25, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 40, v3 +; GCN-NEXT: buffer_store_dword v26, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 44, v3 +; GCN-NEXT: buffer_store_dword v27, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 48, v3 +; GCN-NEXT: buffer_store_dword v28, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 52, v3 +; GCN-NEXT: buffer_store_dword v29, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 56, v3 +; GCN-NEXT: buffer_store_dword v30, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 60, v3 +; GCN-NEXT: buffer_store_dword v31, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 64, v3 +; GCN-NEXT: buffer_store_dword v32, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0x44, v3 +; GCN-NEXT: buffer_store_dword v33, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0x48, v3 +; GCN-NEXT: buffer_store_dword v34, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0x4c, v3 +; GCN-NEXT: buffer_store_dword v35, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0x50, v3 +; GCN-NEXT: buffer_store_dword v20, v60, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0x54, v3 +; GCN-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0x58, v3 +; GCN-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0x5c, v3 +; GCN-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0x60, v3 +; GCN-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0x64, v3 +; GCN-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0x68, v3 +; GCN-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0x6c, v3 +; GCN-NEXT: buffer_store_dword v11, v0, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v20, off, s[0:3], s33 offset:576 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v21, off, s[0:3], s33 offset:580 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v22, off, s[0:3], s33 offset:584 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v23, off, s[0:3], s33 offset:588 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v24, off, s[0:3], s33 offset:592 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v25, off, s[0:3], s33 offset:596 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v26, off, s[0:3], s33 offset:600 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v27, off, s[0:3], s33 offset:604 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v28, off, s[0:3], s33 offset:608 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v29, off, s[0:3], s33 offset:612 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v30, off, s[0:3], s33 offset:616 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v31, off, s[0:3], s33 offset:620 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s33 offset:624 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:628 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v34, off, s[0:3], s33 offset:632 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v35, off, s[0:3], s33 offset:636 ; 4-byte Folded Reload +; GCN-NEXT: v_add_u32_e32 v0, 0x70, v3 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v12, v32 +; GCN-NEXT: buffer_store_dword v12, v0, s[0:3], 0 offen +; GCN-NEXT: v_mov_b32_e32 v13, v33 +; GCN-NEXT: v_add_u32_e32 v0, 0x74, v3 +; GCN-NEXT: buffer_store_dword v13, v0, s[0:3], 0 offen +; GCN-NEXT: v_mov_b32_e32 v14, v34 +; GCN-NEXT: v_add_u32_e32 v0, 0x78, v3 +; GCN-NEXT: buffer_store_dword v14, v0, s[0:3], 0 offen +; GCN-NEXT: v_mov_b32_e32 v15, v35 +; GCN-NEXT: v_add_u32_e32 v0, 0x7c, v3 +; GCN-NEXT: buffer_store_dword v15, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0x80, v3 +; GCN-NEXT: buffer_store_dword v36, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0x84, v3 +; GCN-NEXT: buffer_store_dword v37, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0x88, v3 +; GCN-NEXT: buffer_store_dword v38, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0x8c, v3 +; GCN-NEXT: buffer_store_dword v39, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0x90, v3 +; GCN-NEXT: buffer_store_dword v44, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0x94, v3 +; GCN-NEXT: buffer_store_dword v45, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0x98, v3 +; GCN-NEXT: buffer_store_dword v46, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0x9c, v3 +; GCN-NEXT: buffer_store_dword v47, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0xa0, v3 +; GCN-NEXT: buffer_store_dword v48, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0xa4, v3 +; GCN-NEXT: buffer_store_dword v49, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0xa8, v3 +; GCN-NEXT: buffer_store_dword v50, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0xac, v3 +; GCN-NEXT: buffer_store_dword v51, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0xb0, v3 +; GCN-NEXT: buffer_store_dword v52, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0xb4, v3 +; GCN-NEXT: buffer_store_dword v53, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0xb8, v3 +; GCN-NEXT: buffer_store_dword v54, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0xbc, v3 +; GCN-NEXT: buffer_store_dword v55, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0xc0, v3 +; GCN-NEXT: buffer_store_dword v40, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0xc4, v3 +; GCN-NEXT: buffer_store_dword v41, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0xc8, v3 +; GCN-NEXT: buffer_store_dword v42, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0xcc, v3 +; GCN-NEXT: buffer_store_dword v43, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 4, v3 +; GCN-NEXT: buffer_store_dword v17, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 8, v3 +; GCN-NEXT: buffer_store_dword v18, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 12, v3 +; GCN-NEXT: buffer_store_dword v19, v0, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v16, off, s[0:3], s33 offset:256 +; GCN-NEXT: buffer_load_dword v4, off, s[0:3], s33 offset:640 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v5, off, s[0:3], s33 offset:644 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v6, off, s[0:3], s33 offset:648 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:652 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:656 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:660 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:664 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:668 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v12, off, s[0:3], s33 offset:672 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v13, off, s[0:3], s33 offset:676 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s33 offset:680 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:684 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:688 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v17, off, s[0:3], s33 offset:692 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v18, off, s[0:3], s33 offset:696 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v19, off, s[0:3], s33 offset:700 ; 4-byte Folded Reload +; GCN-NEXT: v_lshrrev_b32_e32 v0, 1, v2 +; GCN-NEXT: v_and_b32_e32 v0, 63, v0 +; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GCN-NEXT: v_add_u32_e32 v0, v3, v0 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v4, v8 +; GCN-NEXT: buffer_store_dword v4, v1, s[0:3], 0 offen +; GCN-NEXT: v_mov_b32_e32 v5, v9 +; GCN-NEXT: v_add_u32_e32 v1, 0xd4, v3 +; GCN-NEXT: buffer_store_dword v5, v1, s[0:3], 0 offen +; GCN-NEXT: v_mov_b32_e32 v6, v10 +; GCN-NEXT: v_add_u32_e32 v1, 0xd8, v3 +; GCN-NEXT: buffer_store_dword v6, v1, s[0:3], 0 offen +; GCN-NEXT: v_mov_b32_e32 v7, v11 +; GCN-NEXT: v_add_u32_e32 v1, 0xdc, v3 ; GCN-NEXT: buffer_store_dword v7, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0xc4, v0 +; GCN-NEXT: buffer_load_dword v4, off, s[0:3], s33 offset:512 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v5, off, s[0:3], s33 offset:516 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v6, off, s[0:3], s33 offset:520 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:524 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:528 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:532 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:536 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:540 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v12, off, s[0:3], s33 offset:544 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v13, off, s[0:3], s33 offset:548 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s33 offset:552 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:556 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:560 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v17, off, s[0:3], s33 offset:564 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v18, off, s[0:3], s33 offset:568 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v19, off, s[0:3], s33 offset:572 ; 4-byte Folded Reload +; GCN-NEXT: v_add_u32_e32 v1, 0xe0, v3 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v8, v12 ; GCN-NEXT: buffer_store_dword v8, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0xc8, v0 +; GCN-NEXT: v_mov_b32_e32 v9, v13 +; GCN-NEXT: v_add_u32_e32 v1, 0xe4, v3 ; GCN-NEXT: buffer_store_dword v9, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0xcc, v0 +; GCN-NEXT: v_mov_b32_e32 v10, v14 +; GCN-NEXT: v_add_u32_e32 v1, 0xe8, v3 ; GCN-NEXT: buffer_store_dword v10, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 4, v0 -; GCN-NEXT: buffer_store_dword v4, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 8, v0 -; GCN-NEXT: buffer_store_dword v5, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 12, v0 -; GCN-NEXT: buffer_store_dword v6, v1, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:256 -; GCN-NEXT: v_add_u32_e32 v3, 0xd0, v0 -; GCN-NEXT: buffer_store_dword v51, v3, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v3, 0xd4, v0 -; GCN-NEXT: buffer_store_dword v52, v3, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v3, 0xd8, v0 -; GCN-NEXT: buffer_store_dword v53, v3, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v3, 0xdc, v0 -; GCN-NEXT: buffer_store_dword v54, v3, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v3, 0xe0, v0 -; GCN-NEXT: buffer_store_dword v55, v3, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v3, 0xe4, v0 -; GCN-NEXT: buffer_store_dword v56, v3, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v3, 0xe8, v0 -; GCN-NEXT: buffer_store_dword v57, v3, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v3, 0xec, v0 -; GCN-NEXT: buffer_store_dword v58, v3, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v3, 0xf0, v0 -; GCN-NEXT: v_lshrrev_b32_e32 v1, 1, v2 -; GCN-NEXT: buffer_store_dword v59, v3, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v3, 0xf4, v0 -; GCN-NEXT: v_and_b32_e32 v1, 63, v1 -; GCN-NEXT: buffer_store_dword v60, v3, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v3, 0xf8, v0 -; GCN-NEXT: buffer_store_dword v61, v3, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v3, 0xfc, v0 -; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v1 -; GCN-NEXT: v_add_u32_e32 v0, v0, v1 -; GCN-NEXT: buffer_store_dword v62, v3, s[0:3], 0 offen +; GCN-NEXT: v_mov_b32_e32 v11, v15 +; GCN-NEXT: v_add_u32_e32 v1, 0xec, v3 +; GCN-NEXT: buffer_store_dword v11, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xf0, v3 +; GCN-NEXT: buffer_store_dword v56, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xf4, v3 +; GCN-NEXT: buffer_store_dword v57, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xf8, v3 +; GCN-NEXT: buffer_store_dword v58, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xfc, v3 +; GCN-NEXT: buffer_store_dword v59, v1, s[0:3], 0 offen ; GCN-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v62, off, s[0:3], s33 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v61, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v60, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v59, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v58, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v57, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v56, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v47, off, s[0:3], s33 offset:28 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v46, off, s[0:3], s33 offset:32 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v45, off, s[0:3], s33 offset:36 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v44, off, s[0:3], s33 offset:40 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v43, off, s[0:3], s33 offset:44 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:48 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:52 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:56 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v60, off, s[0:3], s33 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v59, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v58, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v57, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v56, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v47, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v46, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v45, off, s[0:3], s33 offset:28 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v44, off, s[0:3], s33 offset:32 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v43, off, s[0:3], s33 offset:36 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:40 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:44 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:48 ; 4-byte Folded Reload ; GCN-NEXT: v_and_b32_e32 v1, 1, v2 ; GCN-NEXT: v_lshlrev_b32_e32 v1, 4, v1 ; GCN-NEXT: s_mov_b32 s33, s6 -; GCN-NEXT: s_waitcnt vmcnt(15) +; GCN-NEXT: s_waitcnt vmcnt(13) ; GCN-NEXT: v_lshrrev_b32_e32 v0, v1, v0 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 s[30:31] @@ -583,22 +657,10 @@ define i64 @v_extract_v32i64_varidx(<32 x i64> addrspace(1)* %ptr, i32 %idx) { ; GCN-LABEL: v_extract_v32i64_varidx: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_mov_b32_e32 v15, v0 ; GCN-NEXT: s_add_u32 s4, s32, 0x3fc0 -; GCN-NEXT: s_mov_b32 s5, 0 +; GCN-NEXT: v_add_co_u32_e32 v3, vcc, 64, v0 ; GCN-NEXT: s_mov_b32 s6, s33 ; GCN-NEXT: s_and_b32 s33, s4, 0xffffc000 -; GCN-NEXT: s_movk_i32 s4, 0x80 -; GCN-NEXT: v_mov_b32_e32 v12, s5 -; GCN-NEXT: v_mov_b32_e32 v16, v1 -; GCN-NEXT: v_add_co_u32_e32 v31, vcc, 64, v15 -; GCN-NEXT: v_mov_b32_e32 v11, s4 -; GCN-NEXT: v_addc_co_u32_e32 v32, vcc, 0, v16, vcc -; GCN-NEXT: v_add_co_u32_e32 v48, vcc, v15, v11 -; GCN-NEXT: v_addc_co_u32_e32 v49, vcc, v16, v12, vcc -; GCN-NEXT: s_movk_i32 s4, 0xc0 -; GCN-NEXT: v_mov_b32_e32 v12, s5 -; GCN-NEXT: v_mov_b32_e32 v11, s4 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:56 ; 4-byte Folded Spill ; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill ; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill @@ -614,232 +676,323 @@ define i64 @v_extract_v32i64_varidx(<32 x i64> addrspace(1)* %ptr, i32 %idx) { ; GCN-NEXT: buffer_store_dword v60, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GCN-NEXT: buffer_store_dword v61, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GCN-NEXT: buffer_store_dword v62, off, s[0:3], s33 ; 4-byte Folded Spill -; GCN-NEXT: v_add_co_u32_e32 v59, vcc, v15, v11 -; GCN-NEXT: global_load_dwordx4 v[3:6], v[15:16], off -; GCN-NEXT: global_load_dwordx4 v[7:10], v[15:16], off offset:16 -; GCN-NEXT: v_addc_co_u32_e32 v60, vcc, v16, v12, vcc -; GCN-NEXT: global_load_dwordx4 v[11:14], v[15:16], off offset:32 -; GCN-NEXT: global_load_dwordx4 v[15:18], v[15:16], off offset:48 -; GCN-NEXT: global_load_dwordx4 v[19:22], v[31:32], off -; GCN-NEXT: global_load_dwordx4 v[23:26], v[31:32], off offset:16 -; GCN-NEXT: global_load_dwordx4 v[27:30], v[31:32], off offset:32 -; GCN-NEXT: global_load_dwordx4 v[31:34], v[31:32], off offset:48 -; GCN-NEXT: global_load_dwordx4 v[35:38], v[48:49], off -; GCN-NEXT: global_load_dwordx4 v[39:42], v[48:49], off offset:16 -; GCN-NEXT: global_load_dwordx4 v[43:46], v[48:49], off offset:32 -; GCN-NEXT: v_lshrrev_b32_e64 v0, 6, s33 -; GCN-NEXT: v_add_u32_e32 v0, 0x100, v0 -; GCN-NEXT: v_add_u32_e32 v1, 16, v0 -; GCN-NEXT: s_add_u32 s32, s32, 0x10000 -; GCN-NEXT: s_sub_u32 s32, s32, 0x10000 +; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:512 ; 4-byte Folded Spill +; GCN-NEXT: v_addc_co_u32_e32 v4, vcc, 0, v1, vcc +; GCN-NEXT: global_load_dwordx4 v[7:10], v[3:4], off offset:16 +; GCN-NEXT: global_load_dwordx4 v[11:14], v[3:4], off offset:32 +; GCN-NEXT: global_load_dwordx4 v[56:59], v[3:4], off offset:48 +; GCN-NEXT: global_load_dwordx4 v[15:18], v[0:1], off +; GCN-NEXT: s_movk_i32 s4, 0x80 +; GCN-NEXT: s_mov_b32 s5, 0 +; GCN-NEXT: v_mov_b32_e32 v3, s4 +; GCN-NEXT: v_mov_b32_e32 v4, s5 +; GCN-NEXT: v_add_co_u32_e32 v3, vcc, v0, v3 +; GCN-NEXT: s_movk_i32 s4, 0xc0 +; GCN-NEXT: v_mov_b32_e32 v6, s5 +; GCN-NEXT: v_addc_co_u32_e32 v4, vcc, v1, v4, vcc +; GCN-NEXT: v_mov_b32_e32 v5, s4 +; GCN-NEXT: v_add_co_u32_e32 v60, vcc, v0, v5 +; GCN-NEXT: v_addc_co_u32_e32 v61, vcc, v1, v6, vcc +; GCN-NEXT: v_lshrrev_b32_e64 v62, 6, s33 +; GCN-NEXT: v_add_u32_e32 v62, 0x100, v62 +; GCN-NEXT: v_add_u32_e32 v2, 16, v62 +; GCN-NEXT: s_add_u32 s32, s32, 0x14000 +; GCN-NEXT: s_sub_u32 s32, s32, 0x14000 ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: buffer_store_dword v35, off, s[0:3], s33 offset:576 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: buffer_store_dword v36, off, s[0:3], s33 offset:580 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v37, off, s[0:3], s33 offset:584 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v38, off, s[0:3], s33 offset:588 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v39, off, s[0:3], s33 offset:592 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:596 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:600 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:604 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:608 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:612 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:616 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:620 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v47, off, s[0:3], s33 offset:624 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v48, off, s[0:3], s33 offset:628 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v49, off, s[0:3], s33 offset:632 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v50, off, s[0:3], s33 offset:636 ; 4-byte Folded Spill -; GCN-NEXT: global_load_dwordx4 v[47:50], v[48:49], off offset:48 -; GCN-NEXT: global_load_dwordx4 v[43:46], v[59:60], off -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:512 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:516 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:520 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:524 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v47, off, s[0:3], s33 offset:528 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v48, off, s[0:3], s33 offset:532 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v49, off, s[0:3], s33 offset:536 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v50, off, s[0:3], s33 offset:540 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v51, off, s[0:3], s33 offset:544 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v52, off, s[0:3], s33 offset:548 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v53, off, s[0:3], s33 offset:552 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v54, off, s[0:3], s33 offset:556 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v55, off, s[0:3], s33 offset:560 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v56, off, s[0:3], s33 offset:564 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v57, off, s[0:3], s33 offset:568 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v58, off, s[0:3], s33 offset:572 ; 4-byte Folded Spill -; GCN-NEXT: global_load_dwordx4 v[51:54], v[59:60], off offset:16 -; GCN-NEXT: global_load_dwordx4 v[55:58], v[59:60], off offset:32 -; GCN-NEXT: global_load_dwordx4 v[59:62], v[59:60], off offset:48 -; GCN-NEXT: buffer_store_dword v7, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 20, v0 -; GCN-NEXT: buffer_store_dword v8, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 24, v0 -; GCN-NEXT: buffer_store_dword v9, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 28, v0 -; GCN-NEXT: buffer_store_dword v10, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 32, v0 -; GCN-NEXT: buffer_store_dword v11, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 36, v0 -; GCN-NEXT: buffer_store_dword v12, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 40, v0 -; GCN-NEXT: buffer_store_dword v13, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 44, v0 -; GCN-NEXT: buffer_store_dword v14, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 48, v0 -; GCN-NEXT: buffer_store_dword v15, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 52, v0 -; GCN-NEXT: buffer_store_dword v16, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 56, v0 -; GCN-NEXT: buffer_store_dword v17, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 60, v0 -; GCN-NEXT: buffer_store_dword v18, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 64, v0 -; GCN-NEXT: buffer_store_dword v19, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0x44, v0 -; GCN-NEXT: buffer_store_dword v20, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0x48, v0 -; GCN-NEXT: buffer_store_dword v21, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0x4c, v0 -; GCN-NEXT: buffer_store_dword v22, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0x50, v0 -; GCN-NEXT: buffer_store_dword v23, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0x54, v0 -; GCN-NEXT: buffer_store_dword v24, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0x58, v0 -; GCN-NEXT: buffer_store_dword v25, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0x5c, v0 -; GCN-NEXT: buffer_store_dword v26, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0x60, v0 -; GCN-NEXT: buffer_store_dword v27, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0x64, v0 -; GCN-NEXT: buffer_store_dword v28, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0x68, v0 -; GCN-NEXT: buffer_store_dword v29, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0x6c, v0 -; GCN-NEXT: buffer_store_dword v30, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0x70, v0 -; GCN-NEXT: buffer_store_dword v31, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0x74, v0 -; GCN-NEXT: buffer_store_dword v32, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0x78, v0 -; GCN-NEXT: buffer_store_dword v33, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0x7c, v0 -; GCN-NEXT: buffer_store_dword v34, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0x80, v0 -; GCN-NEXT: buffer_store_dword v35, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0x84, v0 -; GCN-NEXT: buffer_store_dword v36, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0x88, v0 -; GCN-NEXT: buffer_store_dword v37, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0x8c, v0 -; GCN-NEXT: buffer_store_dword v38, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0x90, v0 -; GCN-NEXT: buffer_store_dword v39, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0x94, v0 -; GCN-NEXT: buffer_store_dword v40, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0x98, v0 -; GCN-NEXT: buffer_store_dword v41, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0x9c, v0 -; GCN-NEXT: buffer_store_dword v42, v1, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:576 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:580 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:584 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:588 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:592 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v12, off, s[0:3], s33 offset:596 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v13, off, s[0:3], s33 offset:600 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s33 offset:604 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:608 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:612 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v17, off, s[0:3], s33 offset:616 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v18, off, s[0:3], s33 offset:620 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v19, off, s[0:3], s33 offset:624 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v20, off, s[0:3], s33 offset:628 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v21, off, s[0:3], s33 offset:632 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v22, off, s[0:3], s33 offset:636 ; 4-byte Folded Reload -; GCN-NEXT: v_add_u32_e32 v1, 0xa0, v0 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_mov_b32_e32 v8, v15 -; GCN-NEXT: buffer_store_dword v8, v1, s[0:3], 0 offen -; GCN-NEXT: v_mov_b32_e32 v9, v16 -; GCN-NEXT: v_add_u32_e32 v1, 0xa4, v0 -; GCN-NEXT: buffer_store_dword v9, v1, s[0:3], 0 offen -; GCN-NEXT: v_mov_b32_e32 v10, v17 -; GCN-NEXT: v_add_u32_e32 v1, 0xa8, v0 -; GCN-NEXT: buffer_store_dword v10, v1, s[0:3], 0 offen -; GCN-NEXT: v_mov_b32_e32 v11, v18 -; GCN-NEXT: v_add_u32_e32 v1, 0xac, v0 -; GCN-NEXT: buffer_store_dword v11, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0xb0, v0 -; GCN-NEXT: buffer_store_dword v47, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0xb4, v0 -; GCN-NEXT: buffer_store_dword v48, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0xb8, v0 -; GCN-NEXT: buffer_store_dword v49, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0xbc, v0 -; GCN-NEXT: buffer_store_dword v50, v1, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:512 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:516 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:520 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:524 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:528 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v12, off, s[0:3], s33 offset:532 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v13, off, s[0:3], s33 offset:536 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s33 offset:540 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:544 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:548 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v17, off, s[0:3], s33 offset:552 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v18, off, s[0:3], s33 offset:556 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v19, off, s[0:3], s33 offset:560 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v20, off, s[0:3], s33 offset:564 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v21, off, s[0:3], s33 offset:568 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v22, off, s[0:3], s33 offset:572 ; 4-byte Folded Reload -; GCN-NEXT: v_add_u32_e32 v1, 0xc0, v0 +; GCN-NEXT: buffer_store_dword v15, off, s[0:3], s33 offset:644 ; 4-byte Folded Spill ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: buffer_store_dword v7, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0xc4, v0 -; GCN-NEXT: buffer_store_dword v8, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0xc8, v0 -; GCN-NEXT: buffer_store_dword v9, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0xcc, v0 -; GCN-NEXT: buffer_store_dword v10, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 4, v0 -; GCN-NEXT: buffer_store_dword v4, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 8, v0 -; GCN-NEXT: buffer_store_dword v5, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 12, v0 -; GCN-NEXT: buffer_store_dword v6, v1, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:256 -; GCN-NEXT: v_add_u32_e32 v1, 0xd0, v0 -; GCN-NEXT: buffer_store_dword v51, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0xd4, v0 -; GCN-NEXT: buffer_store_dword v52, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0xd8, v0 -; GCN-NEXT: buffer_store_dword v53, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0xdc, v0 -; GCN-NEXT: buffer_store_dword v54, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0xe0, v0 -; GCN-NEXT: buffer_store_dword v55, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0xe4, v0 -; GCN-NEXT: buffer_store_dword v56, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0xe8, v0 -; GCN-NEXT: buffer_store_dword v57, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0xec, v0 -; GCN-NEXT: buffer_store_dword v58, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0xf0, v0 -; GCN-NEXT: buffer_store_dword v59, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0xf4, v0 -; GCN-NEXT: buffer_store_dword v60, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0xf8, v0 -; GCN-NEXT: buffer_store_dword v61, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0xfc, v0 -; GCN-NEXT: buffer_store_dword v62, v1, s[0:3], 0 offen -; GCN-NEXT: v_and_b32_e32 v1, 31, v2 -; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v1 -; GCN-NEXT: v_add_u32_e32 v0, v0, v1 +; GCN-NEXT: buffer_store_dword v16, off, s[0:3], s33 offset:648 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v17, off, s[0:3], s33 offset:652 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v18, off, s[0:3], s33 offset:656 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v19, off, s[0:3], s33 offset:660 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v20, off, s[0:3], s33 offset:664 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v21, off, s[0:3], s33 offset:668 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v22, off, s[0:3], s33 offset:672 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v23, off, s[0:3], s33 offset:676 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v24, off, s[0:3], s33 offset:680 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v25, off, s[0:3], s33 offset:684 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v26, off, s[0:3], s33 offset:688 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v27, off, s[0:3], s33 offset:692 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v28, off, s[0:3], s33 offset:696 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v29, off, s[0:3], s33 offset:700 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v30, off, s[0:3], s33 offset:704 ; 4-byte Folded Spill +; GCN-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:16 +; GCN-NEXT: global_load_dwordx4 v[24:27], v[0:1], off offset:32 +; GCN-NEXT: global_load_dwordx4 v[28:31], v[0:1], off offset:48 +; GCN-NEXT: global_load_dwordx4 v[32:35], v[0:1], off offset:64 +; GCN-NEXT: global_load_dwordx4 v[36:39], v[0:1], off offset:128 +; GCN-NEXT: global_load_dwordx4 v[40:43], v[0:1], off offset:192 +; GCN-NEXT: global_load_dwordx4 v[44:47], v[3:4], off offset:16 +; GCN-NEXT: global_load_dwordx4 v[48:51], v[3:4], off offset:32 +; GCN-NEXT: global_load_dwordx4 v[52:55], v[3:4], off offset:48 +; GCN-NEXT: global_load_dwordx4 v[15:18], v[60:61], off offset:16 +; GCN-NEXT: v_add_u32_e32 v0, 20, v62 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v11, off, s[0:3], s33 offset:708 ; 4-byte Folded Spill +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v12, off, s[0:3], s33 offset:712 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v13, off, s[0:3], s33 offset:716 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v14, off, s[0:3], s33 offset:720 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v15, off, s[0:3], s33 offset:724 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v16, off, s[0:3], s33 offset:728 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v17, off, s[0:3], s33 offset:732 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v18, off, s[0:3], s33 offset:736 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v19, off, s[0:3], s33 offset:740 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v20, off, s[0:3], s33 offset:744 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v21, off, s[0:3], s33 offset:748 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v22, off, s[0:3], s33 offset:752 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v23, off, s[0:3], s33 offset:756 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v24, off, s[0:3], s33 offset:760 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v25, off, s[0:3], s33 offset:764 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v26, off, s[0:3], s33 offset:768 ; 4-byte Folded Spill +; GCN-NEXT: global_load_dwordx4 v[15:18], v[60:61], off offset:32 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v7, off, s[0:3], s33 offset:580 ; 4-byte Folded Spill +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v8, off, s[0:3], s33 offset:584 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:588 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v10, off, s[0:3], s33 offset:592 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v11, off, s[0:3], s33 offset:596 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v12, off, s[0:3], s33 offset:600 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v13, off, s[0:3], s33 offset:604 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v14, off, s[0:3], s33 offset:608 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v15, off, s[0:3], s33 offset:612 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v16, off, s[0:3], s33 offset:616 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v17, off, s[0:3], s33 offset:620 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v18, off, s[0:3], s33 offset:624 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v19, off, s[0:3], s33 offset:628 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v20, off, s[0:3], s33 offset:632 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v21, off, s[0:3], s33 offset:636 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v22, off, s[0:3], s33 offset:640 ; 4-byte Folded Spill +; GCN-NEXT: global_load_dwordx4 v[15:18], v[60:61], off offset:48 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:516 ; 4-byte Folded Spill +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v4, off, s[0:3], s33 offset:520 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v5, off, s[0:3], s33 offset:524 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v6, off, s[0:3], s33 offset:528 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v7, off, s[0:3], s33 offset:532 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v8, off, s[0:3], s33 offset:536 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:540 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v10, off, s[0:3], s33 offset:544 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v11, off, s[0:3], s33 offset:548 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v12, off, s[0:3], s33 offset:552 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v13, off, s[0:3], s33 offset:556 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v14, off, s[0:3], s33 offset:560 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v15, off, s[0:3], s33 offset:564 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v16, off, s[0:3], s33 offset:568 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v17, off, s[0:3], s33 offset:572 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v18, off, s[0:3], s33 offset:576 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v20, v2, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v21, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 24, v62 +; GCN-NEXT: buffer_store_dword v22, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 28, v62 +; GCN-NEXT: buffer_store_dword v23, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 32, v62 +; GCN-NEXT: buffer_store_dword v24, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 36, v62 +; GCN-NEXT: buffer_store_dword v25, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 40, v62 +; GCN-NEXT: buffer_store_dword v26, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 44, v62 +; GCN-NEXT: buffer_store_dword v27, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 48, v62 +; GCN-NEXT: buffer_store_dword v28, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 52, v62 +; GCN-NEXT: buffer_store_dword v29, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 56, v62 +; GCN-NEXT: buffer_store_dword v30, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 60, v62 +; GCN-NEXT: buffer_store_dword v31, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 64, v62 +; GCN-NEXT: buffer_store_dword v32, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0x44, v62 +; GCN-NEXT: buffer_store_dword v33, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0x48, v62 +; GCN-NEXT: buffer_store_dword v34, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0x4c, v62 +; GCN-NEXT: buffer_store_dword v35, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0x50, v62 +; GCN-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0x54, v62 +; GCN-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0x58, v62 +; GCN-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0x5c, v62 +; GCN-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0x60, v62 +; GCN-NEXT: buffer_store_dword v11, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0x64, v62 +; GCN-NEXT: buffer_store_dword v12, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0x68, v62 +; GCN-NEXT: buffer_store_dword v13, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0x6c, v62 +; GCN-NEXT: buffer_store_dword v14, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0x70, v62 +; GCN-NEXT: buffer_store_dword v56, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0x74, v62 +; GCN-NEXT: buffer_store_dword v57, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0x78, v62 +; GCN-NEXT: buffer_store_dword v58, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0x7c, v62 +; GCN-NEXT: buffer_store_dword v59, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0x80, v62 +; GCN-NEXT: buffer_store_dword v36, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0x84, v62 +; GCN-NEXT: buffer_store_dword v37, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0x88, v62 +; GCN-NEXT: buffer_store_dword v38, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0x8c, v62 +; GCN-NEXT: buffer_store_dword v39, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0x90, v62 +; GCN-NEXT: buffer_store_dword v44, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0x94, v62 +; GCN-NEXT: buffer_store_dword v45, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0x98, v62 +; GCN-NEXT: buffer_store_dword v46, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0x9c, v62 +; GCN-NEXT: buffer_store_dword v47, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0xa0, v62 +; GCN-NEXT: buffer_store_dword v48, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0xa4, v62 +; GCN-NEXT: buffer_store_dword v49, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0xa8, v62 +; GCN-NEXT: buffer_store_dword v50, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0xac, v62 +; GCN-NEXT: buffer_store_dword v51, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0xb0, v62 +; GCN-NEXT: buffer_store_dword v52, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0xb4, v62 +; GCN-NEXT: buffer_store_dword v53, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0xb8, v62 +; GCN-NEXT: buffer_store_dword v54, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0xbc, v62 +; GCN-NEXT: buffer_store_dword v55, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0xc0, v62 +; GCN-NEXT: buffer_store_dword v40, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0xc4, v62 +; GCN-NEXT: buffer_store_dword v41, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0xc8, v62 +; GCN-NEXT: buffer_store_dword v42, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0xcc, v62 +; GCN-NEXT: buffer_store_dword v43, v0, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:644 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:648 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:652 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v4, off, s[0:3], s33 offset:656 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v5, off, s[0:3], s33 offset:660 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v6, off, s[0:3], s33 offset:664 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:668 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:672 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:676 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:680 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:684 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v12, off, s[0:3], s33 offset:688 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v13, off, s[0:3], s33 offset:692 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s33 offset:696 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:700 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:704 ; 4-byte Folded Reload +; GCN-NEXT: v_add_u32_e32 v0, 4, v62 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 8, v62 +; GCN-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 12, v62 +; GCN-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:256 +; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:708 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:712 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:716 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v4, off, s[0:3], s33 offset:720 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v5, off, s[0:3], s33 offset:724 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v6, off, s[0:3], s33 offset:728 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:732 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:736 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:740 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:744 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:748 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v12, off, s[0:3], s33 offset:752 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v13, off, s[0:3], s33 offset:756 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s33 offset:760 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:764 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:768 ; 4-byte Folded Reload +; GCN-NEXT: v_add_u32_e32 v0, 0xd0, v62 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v4, v5 +; GCN-NEXT: v_mov_b32_e32 v5, v6 +; GCN-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0xd4, v62 +; GCN-NEXT: v_mov_b32_e32 v6, v7 +; GCN-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0xd8, v62 +; GCN-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen +; GCN-NEXT: v_mov_b32_e32 v7, v8 +; GCN-NEXT: v_add_u32_e32 v0, 0xdc, v62 +; GCN-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:580 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:584 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:588 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v4, off, s[0:3], s33 offset:592 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v5, off, s[0:3], s33 offset:596 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v6, off, s[0:3], s33 offset:600 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:604 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:608 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:612 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:616 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:620 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v12, off, s[0:3], s33 offset:624 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v13, off, s[0:3], s33 offset:628 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s33 offset:632 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:636 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:640 ; 4-byte Folded Reload +; GCN-NEXT: v_add_u32_e32 v0, 0xe0, v62 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v8, v9 +; GCN-NEXT: v_mov_b32_e32 v9, v10 +; GCN-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0xe4, v62 +; GCN-NEXT: v_mov_b32_e32 v10, v11 +; GCN-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0xe8, v62 +; GCN-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen +; GCN-NEXT: v_mov_b32_e32 v11, v12 +; GCN-NEXT: v_add_u32_e32 v0, 0xec, v62 +; GCN-NEXT: buffer_store_dword v11, v0, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:516 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:520 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:524 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v4, off, s[0:3], s33 offset:528 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v5, off, s[0:3], s33 offset:532 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v6, off, s[0:3], s33 offset:536 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:540 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:544 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:548 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:552 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:556 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v12, off, s[0:3], s33 offset:560 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v13, off, s[0:3], s33 offset:564 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s33 offset:568 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:572 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:576 ; 4-byte Folded Reload +; GCN-NEXT: v_add_u32_e32 v0, 0xf0, v62 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v12, v13 +; GCN-NEXT: v_mov_b32_e32 v13, v14 +; GCN-NEXT: buffer_store_dword v12, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0xf4, v62 +; GCN-NEXT: v_mov_b32_e32 v14, v15 +; GCN-NEXT: buffer_store_dword v13, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0xf8, v62 +; GCN-NEXT: buffer_store_dword v14, v0, s[0:3], 0 offen +; GCN-NEXT: v_mov_b32_e32 v15, v16 +; GCN-NEXT: v_add_u32_e32 v0, 0xfc, v62 +; GCN-NEXT: buffer_store_dword v15, v0, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:512 ; 4-byte Folded Reload +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_and_b32_e32 v0, 31, v0 +; GCN-NEXT: v_lshlrev_b32_e32 v0, 3, v0 +; GCN-NEXT: v_add_u32_e32 v0, v62, v0 ; GCN-NEXT: v_add_u32_e32 v1, 4, v0 ; GCN-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen ; GCN-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll index 02d9d3cfbb85b..5f04b3681dae4 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll @@ -2520,13 +2520,7 @@ define i32 @v_extract_v64i32_32(<64 x i32> addrspace(1)* %ptr) { ; GPRIDX-LABEL: v_extract_v64i32_32: ; GPRIDX: ; %bb.0: ; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GPRIDX-NEXT: s_movk_i32 s4, 0x80 -; GPRIDX-NEXT: s_mov_b32 s5, 0 -; GPRIDX-NEXT: v_mov_b32_e32 v2, s4 -; GPRIDX-NEXT: v_mov_b32_e32 v3, s5 -; GPRIDX-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 -; GPRIDX-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc -; GPRIDX-NEXT: global_load_dwordx4 v[0:3], v[0:1], off +; GPRIDX-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 ; GPRIDX-NEXT: s_waitcnt vmcnt(0) ; GPRIDX-NEXT: s_setpc_b64 s[30:31] ; @@ -2551,13 +2545,7 @@ define i32 @v_extract_v64i32_33(<64 x i32> addrspace(1)* %ptr) { ; GPRIDX-LABEL: v_extract_v64i32_33: ; GPRIDX: ; %bb.0: ; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GPRIDX-NEXT: s_movk_i32 s4, 0x80 -; GPRIDX-NEXT: s_mov_b32 s5, 0 -; GPRIDX-NEXT: v_mov_b32_e32 v2, s4 -; GPRIDX-NEXT: v_mov_b32_e32 v3, s5 -; GPRIDX-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 -; GPRIDX-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc -; GPRIDX-NEXT: global_load_dwordx4 v[0:3], v[0:1], off +; GPRIDX-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 ; GPRIDX-NEXT: s_waitcnt vmcnt(0) ; GPRIDX-NEXT: v_mov_b32_e32 v0, v1 ; GPRIDX-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll index 2fe0c29e54de6..ff28280ba9b45 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll @@ -52,18 +52,17 @@ bb: define amdgpu_kernel void @store_load_vindex_kernel() { ; GFX9-LABEL: store_load_vindex_kernel: ; GFX9: ; %bb.0: ; %bb +; GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s3 ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 2, v0 ; GFX9-NEXT: v_sub_u32_e32 v0, 0, v0 -; GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s3 ; GFX9-NEXT: v_mov_b32_e32 v2, 4 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX9-NEXT: v_add_u32_e32 v0, v2, v0 ; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 ; GFX9-NEXT: v_add_u32_e32 v1, v2, v1 ; GFX9-NEXT: v_mov_b32_e32 v3, 15 ; GFX9-NEXT: scratch_store_dword v1, v3, off -; GFX9-NEXT: v_add_u32_e32 v0, 0x7c, v0 -; GFX9-NEXT: scratch_load_dword v0, v0, off +; GFX9-NEXT: v_add_u32_e32 v0, v2, v0 +; GFX9-NEXT: scratch_load_dword v0, v0, off offset:124 ; GFX9-NEXT: s_endpgm ; ; GFX10-LABEL: store_load_vindex_kernel: @@ -73,15 +72,14 @@ define amdgpu_kernel void @store_load_vindex_kernel() { ; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0 ; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1 ; GFX10-NEXT: v_sub_nc_u32_e32 v1, 0, v0 -; GFX10-NEXT: v_mov_b32_e32 v2, 4 ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX10-NEXT: v_mov_b32_e32 v2, 4 +; GFX10-NEXT: v_mov_b32_e32 v3, 15 ; GFX10-NEXT: v_lshlrev_b32_e32 v1, 2, v1 ; GFX10-NEXT: v_add_nc_u32_e32 v0, v2, v0 ; GFX10-NEXT: v_add_nc_u32_e32 v1, v2, v1 -; GFX10-NEXT: v_mov_b32_e32 v2, 15 -; GFX10-NEXT: v_add_nc_u32_e32 v1, 0x7c, v1 -; GFX10-NEXT: scratch_store_dword v0, v2, off -; GFX10-NEXT: scratch_load_dword v0, v1, off +; GFX10-NEXT: scratch_store_dword v0, v3, off +; GFX10-NEXT: scratch_load_dword v0, v1, off offset:124 ; GFX10-NEXT: s_endpgm bb: %i = alloca [32 x float], align 4, addrspace(5) @@ -147,9 +145,8 @@ define void @private_ptr_foo(float addrspace(5)* nocapture %arg) { ; GFX9-LABEL: private_ptr_foo: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_add_u32_e32 v0, 4, v0 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x41200000 -; GFX9-NEXT: scratch_store_dword v0, v1, off +; GFX9-NEXT: scratch_store_dword v0, v1, off offset:4 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -157,9 +154,8 @@ define void @private_ptr_foo(float addrspace(5)* nocapture %arg) { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_add_nc_u32_e32 v0, 4, v0 ; GFX10-NEXT: v_mov_b32_e32 v1, 0x41200000 -; GFX10-NEXT: scratch_store_dword v0, v1, off +; GFX10-NEXT: scratch_store_dword v0, v1, off offset:4 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] %gep = getelementptr inbounds float, float addrspace(5)* %arg, i32 1 @@ -234,12 +230,11 @@ define amdgpu_kernel void @store_load_vindex_small_offset_kernel() { ; GFX9-NEXT: v_sub_u32_e32 v0, 0, v0 ; GFX9-NEXT: v_mov_b32_e32 v2, 0x104 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX9-NEXT: v_add_u32_e32 v0, v2, v0 ; GFX9-NEXT: v_add_u32_e32 v1, v2, v1 ; GFX9-NEXT: v_mov_b32_e32 v3, 15 ; GFX9-NEXT: scratch_store_dword v1, v3, off -; GFX9-NEXT: v_add_u32_e32 v0, 0x7c, v0 -; GFX9-NEXT: scratch_load_dword v0, v0, off +; GFX9-NEXT: v_add_u32_e32 v0, v2, v0 +; GFX9-NEXT: scratch_load_dword v0, v0, off offset:124 ; GFX9-NEXT: s_endpgm ; ; GFX10-LABEL: store_load_vindex_small_offset_kernel: @@ -249,17 +244,16 @@ define amdgpu_kernel void @store_load_vindex_small_offset_kernel() { ; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0 ; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1 ; GFX10-NEXT: v_sub_nc_u32_e32 v1, 0, v0 -; GFX10-NEXT: v_mov_b32_e32 v2, 0x104 ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX10-NEXT: v_mov_b32_e32 v2, 0x104 +; GFX10-NEXT: v_mov_b32_e32 v3, 15 ; GFX10-NEXT: s_add_u32 s0, 4, 0 ; GFX10-NEXT: v_lshlrev_b32_e32 v1, 2, v1 -; GFX10-NEXT: scratch_load_dword v3, off, s0 ; GFX10-NEXT: v_add_nc_u32_e32 v0, v2, v0 ; GFX10-NEXT: v_add_nc_u32_e32 v1, v2, v1 -; GFX10-NEXT: v_mov_b32_e32 v2, 15 -; GFX10-NEXT: v_add_nc_u32_e32 v1, 0x7c, v1 -; GFX10-NEXT: scratch_store_dword v0, v2, off -; GFX10-NEXT: scratch_load_dword v0, v1, off +; GFX10-NEXT: scratch_load_dword v2, off, s0 +; GFX10-NEXT: scratch_store_dword v0, v3, off +; GFX10-NEXT: scratch_load_dword v0, v1, off offset:124 ; GFX10-NEXT: s_endpgm bb: %padding = alloca [64 x i32], align 4, addrspace(5) @@ -401,12 +395,11 @@ define amdgpu_kernel void @store_load_vindex_large_offset_kernel() { ; GFX9-NEXT: v_sub_u32_e32 v0, 0, v0 ; GFX9-NEXT: v_mov_b32_e32 v2, 0x4004 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX9-NEXT: v_add_u32_e32 v0, v2, v0 ; GFX9-NEXT: v_add_u32_e32 v1, v2, v1 ; GFX9-NEXT: v_mov_b32_e32 v3, 15 ; GFX9-NEXT: scratch_store_dword v1, v3, off -; GFX9-NEXT: v_add_u32_e32 v0, 0x7c, v0 -; GFX9-NEXT: scratch_load_dword v0, v0, off +; GFX9-NEXT: v_add_u32_e32 v0, v2, v0 +; GFX9-NEXT: scratch_load_dword v0, v0, off offset:124 ; GFX9-NEXT: s_endpgm ; ; GFX10-LABEL: store_load_vindex_large_offset_kernel: @@ -416,17 +409,16 @@ define amdgpu_kernel void @store_load_vindex_large_offset_kernel() { ; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0 ; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1 ; GFX10-NEXT: v_sub_nc_u32_e32 v1, 0, v0 -; GFX10-NEXT: v_mov_b32_e32 v2, 0x4004 ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX10-NEXT: v_mov_b32_e32 v2, 0x4004 +; GFX10-NEXT: v_mov_b32_e32 v3, 15 ; GFX10-NEXT: s_add_u32 s0, 4, 0 ; GFX10-NEXT: v_lshlrev_b32_e32 v1, 2, v1 -; GFX10-NEXT: scratch_load_dword v3, off, s0 ; GFX10-NEXT: v_add_nc_u32_e32 v0, v2, v0 ; GFX10-NEXT: v_add_nc_u32_e32 v1, v2, v1 -; GFX10-NEXT: v_mov_b32_e32 v2, 15 -; GFX10-NEXT: v_add_nc_u32_e32 v1, 0x7c, v1 -; GFX10-NEXT: scratch_store_dword v0, v2, off -; GFX10-NEXT: scratch_load_dword v0, v1, off +; GFX10-NEXT: scratch_load_dword v2, off, s0 +; GFX10-NEXT: scratch_store_dword v0, v3, off +; GFX10-NEXT: scratch_load_dword v0, v1, off offset:124 ; GFX10-NEXT: s_endpgm bb: %padding = alloca [4096 x i32], align 4, addrspace(5) @@ -593,9 +585,8 @@ define amdgpu_kernel void @store_load_vidx_sidx_offset(i32 %sidx) { ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_add_lshl_u32 v0, s0, v0, 2 ; GFX9-NEXT: v_add_u32_e32 v0, 4, v0 -; GFX9-NEXT: v_add_u32_e32 v0, 0x400, v0 -; GFX9-NEXT: scratch_store_dword v0, v1, off -; GFX9-NEXT: scratch_load_dword v0, v0, off +; GFX9-NEXT: scratch_store_dword v0, v1, off offset:1024 +; GFX9-NEXT: scratch_load_dword v0, v0, off offset:1024 ; GFX9-NEXT: s_endpgm ; ; GFX10-LABEL: store_load_vidx_sidx_offset: @@ -609,9 +600,8 @@ define amdgpu_kernel void @store_load_vidx_sidx_offset(i32 %sidx) { ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_add_lshl_u32 v0, s0, v0, 2 ; GFX10-NEXT: v_add_nc_u32_e32 v0, 4, v0 -; GFX10-NEXT: v_add_nc_u32_e32 v0, 0x400, v0 -; GFX10-NEXT: scratch_store_dword v0, v1, off -; GFX10-NEXT: scratch_load_dword v0, v0, off +; GFX10-NEXT: scratch_store_dword v0, v1, off offset:1024 +; GFX10-NEXT: scratch_load_dword v0, v0, off offset:1024 ; GFX10-NEXT: s_endpgm bb: %alloca = alloca [32 x i32], align 4, addrspace(5) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.inc.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.inc.ll index 5fc598b3dcbfb..d4fac6bcc8eb3 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.inc.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.inc.ll @@ -999,21 +999,51 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i32(i32* %out, i32* %ptr) #0 { } define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset(i32* %out, i32* %ptr) #0 { -; GCN-LABEL: flat_atomic_inc_ret_i32_offset: -; GCN: ; %bb.0: -; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 -; GCN-NEXT: v_mov_b32_e32 v2, 42 -; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: s_add_u32 s2, s2, 16 -; GCN-NEXT: s_addc_u32 s3, s3, 0 -; GCN-NEXT: v_mov_b32_e32 v0, s2 -; GCN-NEXT: v_mov_b32_e32 v1, s3 -; GCN-NEXT: flat_atomic_inc v2, v[0:1], v2 glc -; GCN-NEXT: v_mov_b32_e32 v0, s0 -; GCN-NEXT: v_mov_b32_e32 v1, s1 -; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN-NEXT: flat_store_dword v[0:1], v2 -; GCN-NEXT: s_endpgm +; CI-LABEL: flat_atomic_inc_ret_i32_offset: +; CI: ; %bb.0: +; CI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; CI-NEXT: v_mov_b32_e32 v2, 42 +; CI-NEXT: s_waitcnt lgkmcnt(0) +; CI-NEXT: s_add_u32 s2, s2, 16 +; CI-NEXT: s_addc_u32 s3, s3, 0 +; CI-NEXT: v_mov_b32_e32 v0, s2 +; CI-NEXT: v_mov_b32_e32 v1, s3 +; CI-NEXT: flat_atomic_inc v2, v[0:1], v2 glc +; CI-NEXT: v_mov_b32_e32 v0, s0 +; CI-NEXT: v_mov_b32_e32 v1, s1 +; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CI-NEXT: flat_store_dword v[0:1], v2 +; CI-NEXT: s_endpgm +; +; VI-LABEL: flat_atomic_inc_ret_i32_offset: +; VI: ; %bb.0: +; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; VI-NEXT: v_mov_b32_e32 v2, 42 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: s_add_u32 s2, s2, 16 +; VI-NEXT: s_addc_u32 s3, s3, 0 +; VI-NEXT: v_mov_b32_e32 v0, s2 +; VI-NEXT: v_mov_b32_e32 v1, s3 +; VI-NEXT: flat_atomic_inc v2, v[0:1], v2 glc +; VI-NEXT: v_mov_b32_e32 v0, s0 +; VI-NEXT: v_mov_b32_e32 v1, s1 +; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; VI-NEXT: flat_store_dword v[0:1], v2 +; VI-NEXT: s_endpgm +; +; GFX9-LABEL: flat_atomic_inc_ret_i32_offset: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GFX9-NEXT: v_mov_b32_e32 v2, 42 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s2 +; GFX9-NEXT: v_mov_b32_e32 v1, s3 +; GFX9-NEXT: flat_atomic_inc v2, v[0:1], v2 offset:16 glc +; GFX9-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: flat_store_dword v[0:1], v2 +; GFX9-NEXT: s_endpgm %gep = getelementptr i32, i32* %ptr, i32 4 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %gep, i32 42, i32 0, i32 0, i1 false) store i32 %result, i32* %out @@ -1035,17 +1065,39 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i32(i32* %ptr) nounwind { } define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset(i32* %ptr) nounwind { -; GCN-LABEL: flat_atomic_inc_noret_i32_offset: -; GCN: ; %bb.0: -; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GCN-NEXT: v_mov_b32_e32 v2, 42 -; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: s_add_u32 s0, s0, 16 -; GCN-NEXT: s_addc_u32 s1, s1, 0 -; GCN-NEXT: v_mov_b32_e32 v0, s0 -; GCN-NEXT: v_mov_b32_e32 v1, s1 -; GCN-NEXT: flat_atomic_inc v0, v[0:1], v2 glc -; GCN-NEXT: s_endpgm +; CI-LABEL: flat_atomic_inc_noret_i32_offset: +; CI: ; %bb.0: +; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; CI-NEXT: v_mov_b32_e32 v2, 42 +; CI-NEXT: s_waitcnt lgkmcnt(0) +; CI-NEXT: s_add_u32 s0, s0, 16 +; CI-NEXT: s_addc_u32 s1, s1, 0 +; CI-NEXT: v_mov_b32_e32 v0, s0 +; CI-NEXT: v_mov_b32_e32 v1, s1 +; CI-NEXT: flat_atomic_inc v0, v[0:1], v2 glc +; CI-NEXT: s_endpgm +; +; VI-LABEL: flat_atomic_inc_noret_i32_offset: +; VI: ; %bb.0: +; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; VI-NEXT: v_mov_b32_e32 v2, 42 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: s_add_u32 s0, s0, 16 +; VI-NEXT: s_addc_u32 s1, s1, 0 +; VI-NEXT: v_mov_b32_e32 v0, s0 +; VI-NEXT: v_mov_b32_e32 v1, s1 +; VI-NEXT: flat_atomic_inc v0, v[0:1], v2 glc +; VI-NEXT: s_endpgm +; +; GFX9-LABEL: flat_atomic_inc_noret_i32_offset: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX9-NEXT: v_mov_b32_e32 v2, 42 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NEXT: flat_atomic_inc v0, v[0:1], v2 offset:16 glc +; GFX9-NEXT: s_endpgm %gep = getelementptr i32, i32* %ptr, i32 4 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %gep, i32 42, i32 0, i32 0, i1 false) ret void @@ -1097,22 +1149,20 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset_addr64(i32* %out, i32* ; GFX9-LABEL: flat_atomic_inc_ret_i32_offset_addr64: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 -; GFX9-NEXT: v_lshlrev_b32_e32 v2, 2, v0 +; GFX9-NEXT: v_lshlrev_b32_e32 v4, 2, v0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s2 +; GFX9-NEXT: v_mov_b32_e32 v3, s1 ; GFX9-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v0, v2 -; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, 0, v1, vcc -; GFX9-NEXT: v_mov_b32_e32 v0, s0 -; GFX9-NEXT: v_mov_b32_e32 v1, s1 -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v4 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, 20, v3 -; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v4, vcc +; GFX9-NEXT: v_mov_b32_e32 v2, s0 +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4 ; GFX9-NEXT: v_mov_b32_e32 v4, 42 -; GFX9-NEXT: flat_atomic_inc v2, v[2:3], v4 glc +; GFX9-NEXT: flat_atomic_inc v0, v[0:1], v4 offset:20 glc +; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX9-NEXT: flat_store_dword v[0:1], v2 +; GFX9-NEXT: flat_store_dword v[2:3], v0 ; GFX9-NEXT: s_endpgm %id = call i32 @llvm.amdgcn.workitem.id.x() %gep.tid = getelementptr i32, i32* %ptr, i32 %id @@ -1163,10 +1213,8 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset_addr64(i32* %ptr) #0 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 ; GFX9-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 20, v0 -; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc ; GFX9-NEXT: v_mov_b32_e32 v2, 42 -; GFX9-NEXT: flat_atomic_inc v0, v[0:1], v2 glc +; GFX9-NEXT: flat_atomic_inc v0, v[0:1], v2 offset:20 glc ; GFX9-NEXT: s_endpgm %id = call i32 @llvm.amdgcn.workitem.id.x() %gep.tid = getelementptr i32, i32* %ptr, i32 %id @@ -1257,22 +1305,54 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i64(i64* %out, i64* %ptr) #0 { } define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset(i64* %out, i64* %ptr) #0 { -; GCN-LABEL: flat_atomic_inc_ret_i64_offset: -; GCN: ; %bb.0: -; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 -; GCN-NEXT: v_mov_b32_e32 v2, 42 -; GCN-NEXT: v_mov_b32_e32 v3, 0 -; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: s_add_u32 s2, s2, 32 -; GCN-NEXT: s_addc_u32 s3, s3, 0 -; GCN-NEXT: v_mov_b32_e32 v0, s2 -; GCN-NEXT: v_mov_b32_e32 v1, s3 -; GCN-NEXT: flat_atomic_inc_x2 v[0:1], v[0:1], v[2:3] glc -; GCN-NEXT: v_mov_b32_e32 v3, s1 -; GCN-NEXT: v_mov_b32_e32 v2, s0 -; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN-NEXT: flat_store_dwordx2 v[2:3], v[0:1] -; GCN-NEXT: s_endpgm +; CI-LABEL: flat_atomic_inc_ret_i64_offset: +; CI: ; %bb.0: +; CI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; CI-NEXT: v_mov_b32_e32 v2, 42 +; CI-NEXT: v_mov_b32_e32 v3, 0 +; CI-NEXT: s_waitcnt lgkmcnt(0) +; CI-NEXT: s_add_u32 s2, s2, 32 +; CI-NEXT: s_addc_u32 s3, s3, 0 +; CI-NEXT: v_mov_b32_e32 v0, s2 +; CI-NEXT: v_mov_b32_e32 v1, s3 +; CI-NEXT: flat_atomic_inc_x2 v[0:1], v[0:1], v[2:3] glc +; CI-NEXT: v_mov_b32_e32 v3, s1 +; CI-NEXT: v_mov_b32_e32 v2, s0 +; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] +; CI-NEXT: s_endpgm +; +; VI-LABEL: flat_atomic_inc_ret_i64_offset: +; VI: ; %bb.0: +; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; VI-NEXT: v_mov_b32_e32 v2, 42 +; VI-NEXT: v_mov_b32_e32 v3, 0 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: s_add_u32 s2, s2, 32 +; VI-NEXT: s_addc_u32 s3, s3, 0 +; VI-NEXT: v_mov_b32_e32 v0, s2 +; VI-NEXT: v_mov_b32_e32 v1, s3 +; VI-NEXT: flat_atomic_inc_x2 v[0:1], v[0:1], v[2:3] glc +; VI-NEXT: v_mov_b32_e32 v3, s1 +; VI-NEXT: v_mov_b32_e32 v2, s0 +; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] +; VI-NEXT: s_endpgm +; +; GFX9-LABEL: flat_atomic_inc_ret_i64_offset: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GFX9-NEXT: v_mov_b32_e32 v0, 42 +; GFX9-NEXT: v_mov_b32_e32 v1, 0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v2, s2 +; GFX9-NEXT: v_mov_b32_e32 v3, s3 +; GFX9-NEXT: flat_atomic_inc_x2 v[0:1], v[2:3], v[0:1] offset:32 glc +; GFX9-NEXT: v_mov_b32_e32 v3, s1 +; GFX9-NEXT: v_mov_b32_e32 v2, s0 +; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: flat_store_dwordx2 v[2:3], v[0:1] +; GFX9-NEXT: s_endpgm %gep = getelementptr i64, i64* %ptr, i32 4 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false) store i64 %result, i64* %out @@ -1295,18 +1375,42 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i64(i64* %ptr) nounwind { } define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset(i64* %ptr) nounwind { -; GCN-LABEL: flat_atomic_inc_noret_i64_offset: -; GCN: ; %bb.0: -; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GCN-NEXT: v_mov_b32_e32 v2, 42 -; GCN-NEXT: v_mov_b32_e32 v3, 0 -; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: s_add_u32 s0, s0, 32 -; GCN-NEXT: s_addc_u32 s1, s1, 0 -; GCN-NEXT: v_mov_b32_e32 v0, s0 -; GCN-NEXT: v_mov_b32_e32 v1, s1 -; GCN-NEXT: flat_atomic_inc_x2 v[0:1], v[0:1], v[2:3] glc -; GCN-NEXT: s_endpgm +; CI-LABEL: flat_atomic_inc_noret_i64_offset: +; CI: ; %bb.0: +; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; CI-NEXT: v_mov_b32_e32 v2, 42 +; CI-NEXT: v_mov_b32_e32 v3, 0 +; CI-NEXT: s_waitcnt lgkmcnt(0) +; CI-NEXT: s_add_u32 s0, s0, 32 +; CI-NEXT: s_addc_u32 s1, s1, 0 +; CI-NEXT: v_mov_b32_e32 v0, s0 +; CI-NEXT: v_mov_b32_e32 v1, s1 +; CI-NEXT: flat_atomic_inc_x2 v[0:1], v[0:1], v[2:3] glc +; CI-NEXT: s_endpgm +; +; VI-LABEL: flat_atomic_inc_noret_i64_offset: +; VI: ; %bb.0: +; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; VI-NEXT: v_mov_b32_e32 v2, 42 +; VI-NEXT: v_mov_b32_e32 v3, 0 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: s_add_u32 s0, s0, 32 +; VI-NEXT: s_addc_u32 s1, s1, 0 +; VI-NEXT: v_mov_b32_e32 v0, s0 +; VI-NEXT: v_mov_b32_e32 v1, s1 +; VI-NEXT: flat_atomic_inc_x2 v[0:1], v[0:1], v[2:3] glc +; VI-NEXT: s_endpgm +; +; GFX9-LABEL: flat_atomic_inc_noret_i64_offset: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX9-NEXT: v_mov_b32_e32 v0, 42 +; GFX9-NEXT: v_mov_b32_e32 v1, 0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v3, s1 +; GFX9-NEXT: v_mov_b32_e32 v2, s0 +; GFX9-NEXT: flat_atomic_inc_x2 v[0:1], v[2:3], v[0:1] offset:32 glc +; GFX9-NEXT: s_endpgm %gep = getelementptr i64, i64* %ptr, i32 4 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false) ret void @@ -1360,23 +1464,21 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset_addr64(i64* %out, i64* ; GFX9-LABEL: flat_atomic_inc_ret_i64_offset_addr64: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 -; GFX9-NEXT: v_lshlrev_b32_e32 v2, 3, v0 +; GFX9-NEXT: v_lshlrev_b32_e32 v4, 3, v0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s2 +; GFX9-NEXT: v_mov_b32_e32 v3, s1 ; GFX9-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v0, v2 -; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, 0, v1, vcc -; GFX9-NEXT: v_mov_b32_e32 v0, s0 -; GFX9-NEXT: v_mov_b32_e32 v1, s1 -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v4 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, 40, v3 -; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v4, vcc +; GFX9-NEXT: v_mov_b32_e32 v2, s0 +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4 ; GFX9-NEXT: v_mov_b32_e32 v4, 42 ; GFX9-NEXT: v_mov_b32_e32 v5, 0 -; GFX9-NEXT: flat_atomic_inc_x2 v[2:3], v[2:3], v[4:5] glc +; GFX9-NEXT: flat_atomic_inc_x2 v[0:1], v[0:1], v[4:5] offset:40 glc +; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX9-NEXT: flat_store_dwordx2 v[0:1], v[2:3] +; GFX9-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; GFX9-NEXT: s_endpgm %id = call i32 @llvm.amdgcn.workitem.id.x() %gep.tid = getelementptr i64, i64* %ptr, i32 %id @@ -1428,12 +1530,10 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset_addr64(i64* %ptr) #0 ; GFX9-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 ; GFX9-NEXT: v_mov_b32_e32 v1, s1 -; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 40, v0 ; GFX9-NEXT: v_mov_b32_e32 v2, 42 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc ; GFX9-NEXT: v_mov_b32_e32 v3, 0 -; GFX9-NEXT: flat_atomic_inc_x2 v[0:1], v[0:1], v[2:3] glc +; GFX9-NEXT: flat_atomic_inc_x2 v[0:1], v[0:1], v[2:3] offset:40 glc ; GFX9-NEXT: s_endpgm %id = call i32 @llvm.amdgcn.workitem.id.x() %gep.tid = getelementptr i64, i64* %ptr, i32 %id diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.fadd.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.fadd.ll index 70651280003e5..4f0c1586cad19 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.fadd.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.fadd.ll @@ -16,13 +16,7 @@ define void @global_atomic_fadd_f32_off_2048(float addrspace(1)* %ptr, float %da ; GFX908-LABEL: global_atomic_fadd_f32_off_2048: ; GFX908: ; %bb.0: ; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX908-NEXT: s_movk_i32 s4, 0x800 -; GFX908-NEXT: s_mov_b32 s5, 0 -; GFX908-NEXT: v_mov_b32_e32 v3, s4 -; GFX908-NEXT: v_mov_b32_e32 v4, s5 -; GFX908-NEXT: v_add_co_u32_e32 v0, vcc, v0, v3 -; GFX908-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v4, vcc -; GFX908-NEXT: global_atomic_add_f32 v[0:1], v2, off +; GFX908-NEXT: global_atomic_add_f32 v[0:1], v2, off offset:2048 ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: s_setpc_b64 s[30:31] %gep = getelementptr float, float addrspace(1)* %ptr, i64 512 @@ -34,13 +28,7 @@ define void @global_atomic_fadd_f32_off_neg2047(float addrspace(1)* %ptr, float ; GFX908-LABEL: global_atomic_fadd_f32_off_neg2047: ; GFX908: ; %bb.0: ; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX908-NEXT: s_mov_b32 s4, 0xfffff804 -; GFX908-NEXT: s_mov_b32 s5, -1 -; GFX908-NEXT: v_mov_b32_e32 v3, s4 -; GFX908-NEXT: v_mov_b32_e32 v4, s5 -; GFX908-NEXT: v_add_co_u32_e32 v0, vcc, v0, v3 -; GFX908-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v4, vcc -; GFX908-NEXT: global_atomic_add_f32 v[0:1], v2, off +; GFX908-NEXT: global_atomic_add_f32 v[0:1], v2, off offset:-2044 ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: s_setpc_b64 s[30:31] %gep = getelementptr float, float addrspace(1)* %ptr, i64 -511 @@ -54,12 +42,10 @@ define amdgpu_kernel void @global_atomic_fadd_f32_off_ss(float addrspace(1)* %pt ; GFX908-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX908-NEXT: s_load_dword s2, s[4:5], 0x8 ; GFX908-NEXT: s_waitcnt lgkmcnt(0) -; GFX908-NEXT: s_add_u32 s0, s0, 0x800 -; GFX908-NEXT: s_addc_u32 s1, s1, 0 ; GFX908-NEXT: v_mov_b32_e32 v0, s0 -; GFX908-NEXT: v_mov_b32_e32 v1, s1 ; GFX908-NEXT: v_mov_b32_e32 v2, s2 -; GFX908-NEXT: global_atomic_add_f32 v[0:1], v2, off +; GFX908-NEXT: v_mov_b32_e32 v1, s1 +; GFX908-NEXT: global_atomic_add_f32 v[0:1], v2, off offset:2048 ; GFX908-NEXT: s_endpgm %gep = getelementptr float, float addrspace(1)* %ptr, i64 512 %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)* %gep, float %data) @@ -81,13 +67,7 @@ define void @global_atomic_fadd_v2f16_off_neg2047(<2 x half> addrspace(1)* %ptr, ; GFX908-LABEL: global_atomic_fadd_v2f16_off_neg2047: ; GFX908: ; %bb.0: ; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX908-NEXT: s_mov_b32 s4, 0xfffff804 -; GFX908-NEXT: s_mov_b32 s5, -1 -; GFX908-NEXT: v_mov_b32_e32 v3, s4 -; GFX908-NEXT: v_mov_b32_e32 v4, s5 -; GFX908-NEXT: v_add_co_u32_e32 v0, vcc, v0, v3 -; GFX908-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v4, vcc -; GFX908-NEXT: global_atomic_pk_add_f16 v[0:1], v2, off +; GFX908-NEXT: global_atomic_pk_add_f16 v[0:1], v2, off offset:-2044 ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: s_setpc_b64 s[30:31] %gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %ptr, i64 -511 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant.96.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant.96.ll index 291f40e4f22a7..ff6467afde03d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant.96.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant.96.ll @@ -18,54 +18,52 @@ define <3 x i32> @v_load_constant_v3i32_align1(<3 x i32> addrspace(4)* %ptr) { ; GFX9-NOUNALIGNED-LABEL: v_load_constant_v3i32_align1: ; GFX9-NOUNALIGNED: ; %bb.0: ; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NOUNALIGNED-NEXT: v_add_co_u32_e32 v2, vcc, 11, v0 -; GFX9-NOUNALIGNED-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v1, vcc -; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v0, v[0:1], off -; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v1, v[2:3], off offset:-10 -; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v4, v[2:3], off offset:-9 -; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v5, v[2:3], off offset:-8 -; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v6, v[2:3], off offset:-7 -; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v7, v[2:3], off offset:-6 -; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v8, v[2:3], off offset:-5 -; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v9, v[2:3], off offset:-4 -; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v10, v[2:3], off offset:-3 -; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v11, v[2:3], off offset:-2 -; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v12, v[2:3], off offset:-1 -; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v2, v[2:3], off -; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v3, 0xff +; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v2, v[0:1], off +; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v3, v[0:1], off offset:1 +; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v4, v[0:1], off offset:2 +; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v5, v[0:1], off offset:3 +; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v6, v[0:1], off offset:4 +; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v7, v[0:1], off offset:5 +; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v8, v[0:1], off offset:6 +; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v9, v[0:1], off offset:7 +; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v10, v[0:1], off offset:8 +; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v11, v[0:1], off offset:9 +; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v12, v[0:1], off offset:10 +; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v0, v[0:1], off offset:11 +; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v1, 0xff ; GFX9-NOUNALIGNED-NEXT: s_movk_i32 s4, 0xff -; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v13, 8 ; GFX9-NOUNALIGNED-NEXT: s_mov_b32 s5, 8 +; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v13, 8 ; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(10) -; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_sdwa v1, s5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_sdwa v3, s5, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 ; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(9) ; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v4, s4, v4 ; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(8) ; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v5, s4, v5 -; GFX9-NOUNALIGNED-NEXT: v_and_or_b32 v0, v0, s4, v1 +; GFX9-NOUNALIGNED-NEXT: v_and_or_b32 v2, v2, s4, v3 ; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(6) -; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_sdwa v7, v13, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_sdwa v7, s5, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 ; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(5) -; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v8, v8, v3 +; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v8, v8, v1 ; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(4) -; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v9, v9, v3 -; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v1, 16, v4 +; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v9, v9, v1 +; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v3, 16, v4 ; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(2) ; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_sdwa v11, v13, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 ; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(1) -; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v12, v12, v3 +; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v12, v12, v1 ; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(0) -; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v2, v2, v3 +; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v0, v0, v1 ; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v4, 24, v5 -; GFX9-NOUNALIGNED-NEXT: v_and_or_b32 v5, v6, v3, v7 +; GFX9-NOUNALIGNED-NEXT: v_and_or_b32 v5, v6, s4, v7 ; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v6, 16, v8 ; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v7, 24, v9 -; GFX9-NOUNALIGNED-NEXT: v_and_or_b32 v3, v10, v3, v11 -; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v8, 16, v12 -; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v2, 24, v2 -; GFX9-NOUNALIGNED-NEXT: v_or3_b32 v0, v0, v1, v4 +; GFX9-NOUNALIGNED-NEXT: v_and_or_b32 v8, v10, v1, v11 +; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v10, 24, v0 +; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v9, 16, v12 +; GFX9-NOUNALIGNED-NEXT: v_or3_b32 v0, v2, v3, v4 ; GFX9-NOUNALIGNED-NEXT: v_or3_b32 v1, v5, v6, v7 -; GFX9-NOUNALIGNED-NEXT: v_or3_b32 v2, v3, v8, v2 +; GFX9-NOUNALIGNED-NEXT: v_or3_b32 v2, v8, v9, v10 ; GFX9-NOUNALIGNED-NEXT: s_setpc_b64 s[30:31] ; ; GFX7-UNALIGNED-LABEL: v_load_constant_v3i32_align1: @@ -156,28 +154,25 @@ define <3 x i32> @v_load_constant_v3i32_align2(<3 x i32> addrspace(4)* %ptr) { ; GFX9-NOUNALIGNED-LABEL: v_load_constant_v3i32_align2: ; GFX9-NOUNALIGNED: ; %bb.0: ; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NOUNALIGNED-NEXT: v_add_co_u32_e32 v2, vcc, 10, v0 -; GFX9-NOUNALIGNED-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v1, vcc -; GFX9-NOUNALIGNED-NEXT: global_load_ushort v0, v[0:1], off -; GFX9-NOUNALIGNED-NEXT: global_load_ushort v1, v[2:3], off offset:-8 -; GFX9-NOUNALIGNED-NEXT: global_load_ushort v4, v[2:3], off offset:-6 -; GFX9-NOUNALIGNED-NEXT: global_load_ushort v5, v[2:3], off offset:-4 -; GFX9-NOUNALIGNED-NEXT: global_load_ushort v6, v[2:3], off offset:-2 -; GFX9-NOUNALIGNED-NEXT: global_load_ushort v2, v[2:3], off -; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v3, 0xffff +; GFX9-NOUNALIGNED-NEXT: global_load_ushort v2, v[0:1], off +; GFX9-NOUNALIGNED-NEXT: global_load_ushort v3, v[0:1], off offset:2 +; GFX9-NOUNALIGNED-NEXT: global_load_ushort v4, v[0:1], off offset:4 +; GFX9-NOUNALIGNED-NEXT: global_load_ushort v5, v[0:1], off offset:6 +; GFX9-NOUNALIGNED-NEXT: global_load_ushort v6, v[0:1], off offset:8 +; GFX9-NOUNALIGNED-NEXT: global_load_ushort v0, v[0:1], off offset:10 ; GFX9-NOUNALIGNED-NEXT: s_mov_b32 s4, 0xffff ; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(4) -; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v1, s4, v1 +; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v1, s4, v3 ; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(2) -; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v5, v5, v3 -; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v5, 16, v5 +; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v3, s4, v5 +; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v3, 16, v3 ; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(0) -; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v2, v2, v3 -; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX9-NOUNALIGNED-NEXT: v_and_or_b32 v0, v0, s4, v1 -; GFX9-NOUNALIGNED-NEXT: v_and_or_b32 v1, v4, v3, v5 -; GFX9-NOUNALIGNED-NEXT: v_and_or_b32 v2, v6, v3, v2 +; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v0, s4, v0 +; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v5, 16, v0 +; GFX9-NOUNALIGNED-NEXT: v_and_or_b32 v0, v2, s4, v1 +; GFX9-NOUNALIGNED-NEXT: v_and_or_b32 v1, v4, s4, v3 +; GFX9-NOUNALIGNED-NEXT: v_and_or_b32 v2, v6, s4, v5 ; GFX9-NOUNALIGNED-NEXT: s_setpc_b64 s[30:31] ; ; GFX7-UNALIGNED-LABEL: v_load_constant_v3i32_align2: From 65ba0cd3955f8c609ff314dc0cda7bc8ded4a083 Mon Sep 17 00:00:00 2001 From: Alex Zinenko Date: Wed, 23 Dec 2020 11:19:35 +0100 Subject: [PATCH 161/378] [mlir] Modernize std-to-llvm operation conversion doc This was long overdue. Replace the outdated type syntax with the new syntax, and update the description of how memref load/stores are handled to reflect the latest changes in the implementation. Reviewed By: herhut Differential Revision: https://reviews.llvm.org/D93555 --- mlir/docs/ConversionToLLVMDialect.md | 467 ----------------------- mlir/docs/LLVMDialectMemRefConvention.md | 439 +++++++++++++++++++++ 2 files changed, 439 insertions(+), 467 deletions(-) create mode 100644 mlir/docs/LLVMDialectMemRefConvention.md diff --git a/mlir/docs/ConversionToLLVMDialect.md b/mlir/docs/ConversionToLLVMDialect.md index 778eea6184c9e..2b5f98b376861 100644 --- a/mlir/docs/ConversionToLLVMDialect.md +++ b/mlir/docs/ConversionToLLVMDialect.md @@ -280,470 +280,3 @@ Examples: !llvm.func, ptr, i64)>, struct<(ptr, ptr, i64)>)> ()> ``` - -## Calling Convention for Standard Calls - - - -### Result Packing - -In case of multi-result functions, the returned values are inserted into a -structure-typed value before being returned and extracted from it at the call -site. This transformation is a part of the conversion and is transparent to the -defines and uses of the values being returned. - -Example: - -```mlir -func @foo(%arg0: i32, %arg1: i64) -> (i32, i64) { - return %arg0, %arg1 : i32, i64 -} -func @bar() { - %0 = constant 42 : i32 - %1 = constant 17 : i64 - %2:2 = call @foo(%0, %1) : (i32, i64) -> (i32, i64) - "use_i32"(%2#0) : (i32) -> () - "use_i64"(%2#1) : (i64) -> () -} - -// is transformed into - -func @foo(%arg0: !llvm.i32, %arg1: !llvm.i64) -> !llvm<"{i32, i64}"> { - // insert the vales into a structure - %0 = llvm.mlir.undef : !llvm<"{i32, i64}"> - %1 = llvm.insertvalue %arg0, %0[0] : !llvm<"{i32, i64}"> - %2 = llvm.insertvalue %arg1, %1[1] : !llvm<"{i32, i64}"> - - // return the structure value - llvm.return %2 : !llvm<"{i32, i64}"> -} -func @bar() { - %0 = llvm.mlir.constant(42 : i32) : !llvm.i32 - %1 = llvm.mlir.constant(17) : !llvm.i64 - - // call and extract the values from the structure - %2 = llvm.call @bar(%0, %1) : (%arg0: !llvm.i32, %arg1: !llvm.i32) -> !llvm<"{i32, i64}"> - %3 = llvm.extractvalue %2[0] : !llvm<"{i32, i64}"> - %4 = llvm.extractvalue %2[1] : !llvm<"{i32, i64}"> - - // use as before - "use_i32"(%3) : (!llvm.i32) -> () - "use_i64"(%4) : (!llvm.i64) -> () -} -``` - -### Calling Convention for Ranked `memref` - -Function _arguments_ of `memref` type, ranked or unranked, are _expanded_ into a -list of arguments of non-aggregate types that the memref descriptor defined -above comprises. That is, the outer struct type and the inner array types are -replaced with individual arguments. - -This convention is implemented in the conversion of `std.func` and `std.call` to -the LLVM dialect, with the former unpacking the descriptor into a set of -individual values and the latter packing those values back into a descriptor so -as to make it transparently usable by other operations. Conversions from other -dialects should take this convention into account. - -This specific convention is motivated by the necessity to specify alignment and -aliasing attributes on the raw pointers underpinning the memref. - -Examples: - -```mlir -func @foo(%arg0: memref) -> () { - "use"(%arg0) : (memref) -> () - return -} - -// Gets converted to the following. - -llvm.func @foo(%arg0: !llvm<"float*">, // Allocated pointer. - %arg1: !llvm<"float*">, // Aligned pointer. - %arg2: !llvm.i64, // Offset. - %arg3: !llvm.i64, // Size in dim 0. - %arg4: !llvm.i64) { // Stride in dim 0. - // Populate memref descriptor structure. - %0 = llvm.mlir.undef : !llvm<"{ float*, float*, i64, [1 x i64], [1 x i64] }"> - %1 = llvm.insertvalue %arg0, %0[0] : !llvm<"{ float*, float*, i64, [1 x i64], [1 x i64] }"> - %2 = llvm.insertvalue %arg1, %1[1] : !llvm<"{ float*, float*, i64, [1 x i64], [1 x i64] }"> - %3 = llvm.insertvalue %arg2, %2[2] : !llvm<"{ float*, float*, i64, [1 x i64], [1 x i64] }"> - %4 = llvm.insertvalue %arg3, %3[3, 0] : !llvm<"{ float*, float*, i64, [1 x i64], [1 x i64] }"> - %5 = llvm.insertvalue %arg4, %4[4, 0] : !llvm<"{ float*, float*, i64, [1 x i64], [1 x i64] }"> - - // Descriptor is now usable as a single value. - "use"(%5) : (!llvm<"{ float*, float*, i64, [1 x i64], [1 x i64] }">) -> () - llvm.return -} -``` - -```mlir -func @bar() { - %0 = "get"() : () -> (memref) - call @foo(%0) : (memref) -> () - return -} - -// Gets converted to the following. - -llvm.func @bar() { - %0 = "get"() : () -> !llvm<"{ float*, float*, i64, [1 x i64], [1 x i64] }"> - - // Unpack the memref descriptor. - %1 = llvm.extractvalue %0[0] : !llvm<"{ float*, float*, i64, [1 x i64], [1 x i64] }"> - %2 = llvm.extractvalue %0[1] : !llvm<"{ float*, float*, i64, [1 x i64], [1 x i64] }"> - %3 = llvm.extractvalue %0[2] : !llvm<"{ float*, float*, i64, [1 x i64], [1 x i64] }"> - %4 = llvm.extractvalue %0[3, 0] : !llvm<"{ float*, float*, i64, [1 x i64], [1 x i64] }"> - %5 = llvm.extractvalue %0[4, 0] : !llvm<"{ float*, float*, i64, [1 x i64], [1 x i64] }"> - - // Pass individual values to the callee. - llvm.call @foo(%1, %2, %3, %4, %5) : (!llvm<"float*">, !llvm<"float*">, !llvm.i64, !llvm.i64, !llvm.i64) -> () - llvm.return -} - -``` - -### Calling Convention for Unranked `memref` - -For unranked memrefs, the list of function arguments always contains two -elements, same as the unranked memref descriptor: an integer rank, and a -type-erased (`!llvm<"i8*">`) pointer to the ranked memref descriptor. Note that -while the _calling convention_ does not require stack allocation, _casting_ to -unranked memref does since one cannot take an address of an SSA value containing -the ranked memref. The caller is in charge of ensuring the thread safety and -eventually removing unnecessary stack allocations in cast operations. - -Example - -```mlir -llvm.func @foo(%arg0: memref<*xf32>) -> () { - "use"(%arg0) : (memref<*xf32>) -> () - return -} - -// Gets converted to the following. - -llvm.func @foo(%arg0: !llvm.i64 // Rank. - %arg1: !llvm<"i8*">) { // Type-erased pointer to descriptor. - // Pack the unranked memref descriptor. - %0 = llvm.mlir.undef : !llvm<"{ i64, i8* }"> - %1 = llvm.insertvalue %arg0, %0[0] : !llvm<"{ i64, i8* }"> - %2 = llvm.insertvalue %arg1, %1[1] : !llvm<"{ i64, i8* }"> - - "use"(%2) : (!llvm<"{ i64, i8* }">) -> () - llvm.return -} -``` - -```mlir -llvm.func @bar() { - %0 = "get"() : () -> (memref<*xf32>) - call @foo(%0): (memref<*xf32>) -> () - return -} - -// Gets converted to the following. - -llvm.func @bar() { - %0 = "get"() : () -> (!llvm<"{ i64, i8* }">) - - // Unpack the memref descriptor. - %1 = llvm.extractvalue %0[0] : !llvm<"{ i64, i8* }"> - %2 = llvm.extractvalue %0[1] : !llvm<"{ i64, i8* }"> - - // Pass individual values to the callee. - llvm.call @foo(%1, %2) : (!llvm.i64, !llvm<"i8*">) - llvm.return -} -``` - -**Lifetime.** The second element of the unranked memref descriptor points to -some memory in which the ranked memref descriptor is stored. By convention, this -memory is allocated on stack and has the lifetime of the function. (*Note:* due -to function-length lifetime, creation of multiple unranked memref descriptors, -e.g., in a loop, may lead to stack overflows.) If an unranked descriptor has to -be returned from a function, the ranked descriptor it points to is copied into -dynamically allocated memory, and the pointer in the unranked descriptor is -updated accordingly. The allocation happens immediately before returning. It is -the responsibility of the caller to free the dynamically allocated memory. The -default conversion of `std.call` and `std.call_indirect` copies the ranked -descriptor to newly allocated memory on the caller's stack. Thus, the convention -of the ranked memref descriptor pointed to by an unranked memref descriptor -being stored on stack is respected. - -*This convention may or may not apply if the conversion of MemRef types is -overridden by the user.* - -### C-compatible wrapper emission - -In practical cases, it may be desirable to have externally-facing functions with -a single attribute corresponding to a MemRef argument. When interfacing with -LLVM IR produced from C, the code needs to respect the corresponding calling -convention. The conversion to the LLVM dialect provides an option to generate -wrapper functions that take memref descriptors as pointers-to-struct compatible -with data types produced by Clang when compiling C sources. The generation of -such wrapper functions can additionally be controlled at a function granularity -by setting the `llvm.emit_c_interface` unit attribute. - -More specifically, a memref argument is converted into a pointer-to-struct -argument of type `{T*, T*, i64, i64[N], i64[N]}*` in the wrapper function, where -`T` is the converted element type and `N` is the memref rank. This type is -compatible with that produced by Clang for the following C++ structure template -instantiations or their equivalents in C. - -```cpp -template -struct MemRefDescriptor { - T *allocated; - T *aligned; - intptr_t offset; - intptr_t sizes[N]; - intptr_t strides[N]; -}; -``` - -If enabled, the option will do the following. For _external_ functions declared -in the MLIR module. - -1. Declare a new function `_mlir_ciface_` where memref arguments - are converted to pointer-to-struct and the remaining arguments are converted - as usual. -1. Add a body to the original function (making it non-external) that - 1. allocates a memref descriptor, - 1. populates it, and - 1. passes the pointer to it into the newly declared interface function, then - 1. collects the result of the call and returns it to the caller. - -For (non-external) functions defined in the MLIR module. - -1. Define a new function `_mlir_ciface_` where memref arguments - are converted to pointer-to-struct and the remaining arguments are converted - as usual. -1. Populate the body of the newly defined function with IR that - 1. loads descriptors from pointers; - 1. unpacks descriptor into individual non-aggregate values; - 1. passes these values into the original function; - 1. collects the result of the call and returns it to the caller. - -Examples: - -```mlir - -func @qux(%arg0: memref) - -// Gets converted into the following. - -// Function with unpacked arguments. -llvm.func @qux(%arg0: !llvm<"float*">, %arg1: !llvm<"float*">, %arg2: !llvm.i64, - %arg3: !llvm.i64, %arg4: !llvm.i64, %arg5: !llvm.i64, - %arg6: !llvm.i64) { - // Populate memref descriptor (as per calling convention). - %0 = llvm.mlir.undef : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> - %1 = llvm.insertvalue %arg0, %0[0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> - %2 = llvm.insertvalue %arg1, %1[1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> - %3 = llvm.insertvalue %arg2, %2[2] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> - %4 = llvm.insertvalue %arg3, %3[3, 0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> - %5 = llvm.insertvalue %arg5, %4[4, 0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> - %6 = llvm.insertvalue %arg4, %5[3, 1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> - %7 = llvm.insertvalue %arg6, %6[4, 1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> - - // Store the descriptor in a stack-allocated space. - %8 = llvm.mlir.constant(1 : index) : !llvm.i64 - %9 = llvm.alloca %8 x !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> - : (!llvm.i64) -> !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }*"> - llvm.store %7, %9 : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }*"> - - // Call the interface function. - llvm.call @_mlir_ciface_qux(%9) : (!llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }*">) -> () - - // The stored descriptor will be freed on return. - llvm.return -} - -// Interface function. -llvm.func @_mlir_ciface_qux(!llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }*">) -``` - -```mlir -func @foo(%arg0: memref) { - return -} - -// Gets converted into the following. - -// Function with unpacked arguments. -llvm.func @foo(%arg0: !llvm<"float*">, %arg1: !llvm<"float*">, %arg2: !llvm.i64, - %arg3: !llvm.i64, %arg4: !llvm.i64, %arg5: !llvm.i64, - %arg6: !llvm.i64) { - llvm.return -} - -// Interface function callable from C. -llvm.func @_mlir_ciface_foo(%arg0: !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }*">) { - // Load the descriptor. - %0 = llvm.load %arg0 : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }*"> - - // Unpack the descriptor as per calling convention. - %1 = llvm.extractvalue %0[0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> - %2 = llvm.extractvalue %0[1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> - %3 = llvm.extractvalue %0[2] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> - %4 = llvm.extractvalue %0[3, 0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> - %5 = llvm.extractvalue %0[3, 1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> - %6 = llvm.extractvalue %0[4, 0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> - %7 = llvm.extractvalue %0[4, 1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> - llvm.call @foo(%1, %2, %3, %4, %5, %6, %7) - : (!llvm<"float*">, !llvm<"float*">, !llvm.i64, !llvm.i64, !llvm.i64, - !llvm.i64, !llvm.i64) -> () - llvm.return -} -``` - -Rationale: Introducing auxiliary functions for C-compatible interfaces is -preferred to modifying the calling convention since it will minimize the effect -of C compatibility on intra-module calls or calls between MLIR-generated -functions. In particular, when calling external functions from an MLIR module in -a (parallel) loop, the fact of storing a memref descriptor on stack can lead to -stack exhaustion and/or concurrent access to the same address. Auxiliary -interface function serves as an allocation scope in this case. Furthermore, when -targeting accelerators with separate memory spaces such as GPUs, stack-allocated -descriptors passed by pointer would have to be transferred to the device memory, -which introduces significant overhead. In such situations, auxiliary interface -functions are executed on host and only pass the values through device function -invocation mechanism. - -## Repeated Successor Removal - -Since the goal of the LLVM IR dialect is to reflect LLVM IR in MLIR, the dialect -and the conversion procedure must account for the differences between block -arguments and LLVM IR PHI nodes. In particular, LLVM IR disallows PHI nodes with -different values coming from the same source. Therefore, the LLVM IR dialect -disallows operations that have identical successors accepting arguments, which -would lead to invalid PHI nodes. The conversion process resolves the potential -PHI source ambiguity by injecting dummy blocks if the same block is used more -than once as a successor in an instruction. These dummy blocks branch -unconditionally to the original successors, pass them the original operands -(available in the dummy block because it is dominated by the original block) and -are used instead of them in the original terminator operation. - -Example: - -```mlir - cond_br %0, ^bb1(%1 : i32), ^bb1(%2 : i32) -^bb1(%3 : i32) - "use"(%3) : (i32) -> () -``` - -leads to a new basic block being inserted, - -```mlir - cond_br %0, ^bb1(%1 : i32), ^dummy -^bb1(%3 : i32): - "use"(%3) : (i32) -> () -^dummy: - br ^bb1(%4 : i32) -``` - -before the conversion to the LLVM IR dialect: - -```mlir - llvm.cond_br %0, ^bb1(%1 : !llvm.i32), ^dummy -^bb1(%3 : !llvm<"i32">): - "use"(%3) : (!llvm.i32) -> () -^dummy: - llvm.br ^bb1(%2 : !llvm.i32) -``` - -## Default Memref Model - -### Memref Descriptor - -Within a converted function, a `memref`-typed value is represented by a memref -_descriptor_, the type of which is the structure type obtained by converting -from the memref type. This descriptor holds all the necessary information to -produce an address of a specific element. In particular, it holds dynamic values -for static sizes, and they are expected to match at all times. - -It is created by the allocation operation and is updated by the conversion -operations that may change static dimensions into dynamic dimensions and vice versa. - -**Note**: LLVM IR conversion does not support `memref`s with layouts that are -not amenable to the strided form. - -### Index Linearization - -Accesses to a memref element are transformed into an access to an element of the -buffer pointed to by the descriptor. The position of the element in the buffer -is calculated by linearizing memref indices in row-major order (lexically first -index is the slowest varying, similar to C, but accounting for strides). The -computation of the linear address is emitted as arithmetic operation in the LLVM -IR dialect. Strides are extracted from the memref descriptor. - -Accesses to zero-dimensional memref (that are interpreted as pointers to the -elemental type) are directly converted into `llvm.load` or `llvm.store` without -any pointer manipulations. - -Examples: - -An access to a zero-dimensional memref is converted into a plain load: - -```mlir -// before -%0 = load %m[] : memref - -// after -%0 = llvm.load %m : !llvm<"float*"> -``` - -An access to a memref with indices: - -```mlir -%0 = load %m[1,2,3,4] : memref<10x?x13x?xf32> -``` - -is transformed into the equivalent of the following code: - -```mlir -// Compute the linearized index from strides. Each block below extracts one -// stride from the descriptor, multiplies it with the index and accumulates -// the total offset. -%stride1 = llvm.extractvalue[4, 0] : !llvm<"{float*, float*, i64, i64[4], i64[4]}"> -%idx1 = llvm.mlir.constant(1 : index) !llvm.i64 -%addr1 = muli %stride1, %idx1 : !llvm.i64 - -%stride2 = llvm.extractvalue[4, 1] : !llvm<"{float*, float*, i64, i64[4], i64[4]}"> -%idx2 = llvm.mlir.constant(2 : index) !llvm.i64 -%addr2 = muli %stride2, %idx2 : !llvm.i64 -%addr3 = addi %addr1, %addr2 : !llvm.i64 - -%stride3 = llvm.extractvalue[4, 2] : !llvm<"{float*, float*, i64, i64[4], i64[4]}"> -%idx3 = llvm.mlir.constant(3 : index) !llvm.i64 -%addr4 = muli %stride3, %idx3 : !llvm.i64 -%addr5 = addi %addr3, %addr4 : !llvm.i64 - -%stride4 = llvm.extractvalue[4, 3] : !llvm<"{float*, float*, i64, i64[4], i64[4]}"> -%idx4 = llvm.mlir.constant(4 : index) !llvm.i64 -%addr6 = muli %stride4, %idx4 : !llvm.i64 -%addr7 = addi %addr5, %addr6 : !llvm.i64 - -// Add the linear offset to the address. -%offset = llvm.extractvalue[2] : !llvm<"{float*, float*, i64, i64[4], i64[4]}"> -%addr8 = addi %addr7, %offset : !llvm.i64 - -// Obtain the aligned pointer. -%aligned = llvm.extractvalue[1] : !llvm<"{float*, float*, i64, i64[4], i64[4]}"> - -// Get the address of the data pointer. -%ptr = llvm.getelementptr %aligned[%addr8] - : !llvm<"{float*, float*, i64, i64[4], i64[4]}"> -> !llvm<"float*"> - -// Perform the actual load. -%0 = llvm.load %ptr : !llvm<"float*"> -``` - -For stores, the address computation code is identical and only the actual store -operation is different. - -Note: the conversion does not perform any sort of common subexpression -elimination when emitting memref accesses. diff --git a/mlir/docs/LLVMDialectMemRefConvention.md b/mlir/docs/LLVMDialectMemRefConvention.md new file mode 100644 index 0000000000000..94ca718bd744a --- /dev/null +++ b/mlir/docs/LLVMDialectMemRefConvention.md @@ -0,0 +1,439 @@ +# Built-in Function and MemRef Calling Convention + +This documents describes the calling convention implemented in the conversion of +built-in [function operation](LangRef.md#functions), standard +[`call`](Dialects/Standard.md#stdcall-callop) operations and the handling of +[`memref`](LangRef.md#memref-type) type equivalents in the +[LLVM dialect](Dialects/LLVM.md). The conversion assumes the _default_ +convention was used when converting +[built-in to the LLVM dialect types](ConversionToLLVMDialect.md). + +## Function Result Packing + +In case of multi-result functions, the returned values are inserted into a +structure-typed value before being returned and extracted from it at the call +site. This transformation is a part of the conversion and is transparent to the +defines and uses of the values being returned. + +Example: + +```mlir +func @foo(%arg0: i32, %arg1: i64) -> (i32, i64) { + return %arg0, %arg1 : i32, i64 +} +func @bar() { + %0 = constant 42 : i32 + %1 = constant 17 : i64 + %2:2 = call @foo(%0, %1) : (i32, i64) -> (i32, i64) + "use_i32"(%2#0) : (i32) -> () + "use_i64"(%2#1) : (i64) -> () +} + +// is transformed into + +llvm.func @foo(%arg0: !llvm.i32, %arg1: !llvm.i64) -> !llvm.struct<(i32, i64)> { + // insert the vales into a structure + %0 = llvm.mlir.undef : !llvm.struct<(i32, i64)> + %1 = llvm.insertvalue %arg0, %0[0] : !llvm.struct<(i32, i64)> + %2 = llvm.insertvalue %arg1, %1[1] : !llvm.struct<(i32, i64)> + + // return the structure value + llvm.return %2 : !llvm.struct<(i32, i64)> +} +llvm.func @bar() { + %0 = llvm.mlir.constant(42 : i32) : !llvm.i32 + %1 = llvm.mlir.constant(17) : !llvm.i64 + + // call and extract the values from the structure + %2 = llvm.call @bar(%0, %1) + : (!llvm.i32, !llvm.i32) -> !llvm.struct<(i32, i64)> + %3 = llvm.extractvalue %2[0] : !llvm.struct<(i32, i64)> + %4 = llvm.extractvalue %2[1] : !llvm.struct<(i32, i64)> + + // use as before + "use_i32"(%3) : (!llvm.i32) -> () + "use_i64"(%4) : (!llvm.i64) -> () +} +``` + +## Calling Convention for Ranked `memref` + +Function _arguments_ of `memref` type, ranked or unranked, are _expanded_ into a +list of arguments of non-aggregate types that the memref descriptor defined +above comprises. That is, the outer struct type and the inner array types are +replaced with individual arguments. + +This convention is implemented in the conversion of `std.func` and `std.call` to +the LLVM dialect, with the former unpacking the descriptor into a set of +individual values and the latter packing those values back into a descriptor so +as to make it transparently usable by other operations. Conversions from other +dialects should take this convention into account. + +This specific convention is motivated by the necessity to specify alignment and +aliasing attributes on the raw pointers underpinning the memref. + +Examples: + +```mlir +func @foo(%arg0: memref) -> () { + "use"(%arg0) : (memref) -> () + return +} + +// Gets converted to the following +// (using type alias for brevity): +!llvm.memref_1d = type !llvm.struct<(ptr, ptr, i64, + array<1xi64>, array<1xi64>)> + +llvm.func @foo(%arg0: !llvm.ptr, // Allocated pointer. + %arg1: !llvm.ptr, // Aligned pointer. + %arg2: !llvm.i64, // Offset. + %arg3: !llvm.i64, // Size in dim 0. + %arg4: !llvm.i64) { // Stride in dim 0. + // Populate memref descriptor structure. + %0 = llvm.mlir.undef : + %1 = llvm.insertvalue %arg0, %0[0] : !llvm.memref_1d + %2 = llvm.insertvalue %arg1, %1[1] : !llvm.memref_1d + %3 = llvm.insertvalue %arg2, %2[2] : !llvm.memref_1d + %4 = llvm.insertvalue %arg3, %3[3, 0] : !llvm.memref_1d + %5 = llvm.insertvalue %arg4, %4[4, 0] : !llvm.memref_1d + + // Descriptor is now usable as a single value. + "use"(%5) : (!llvm.memref_1d) -> () + llvm.return +} +``` + +```mlir +func @bar() { + %0 = "get"() : () -> (memref) + call @foo(%0) : (memref) -> () + return +} + +// Gets converted to the following +// (using type alias for brevity): +!llvm.memref_1d = type !llvm.struct<(ptr, ptr, i64, + array<1xi64>, array<1xi64>)> + +llvm.func @bar() { + %0 = "get"() : () -> !llvm.memref_1d + + // Unpack the memref descriptor. + %1 = llvm.extractvalue %0[0] : !llvm.memref_1d + %2 = llvm.extractvalue %0[1] : !llvm.memref_1d + %3 = llvm.extractvalue %0[2] : !llvm.memref_1d + %4 = llvm.extractvalue %0[3, 0] : !llvm.memref_1d + %5 = llvm.extractvalue %0[4, 0] : !llvm.memref_1d + + // Pass individual values to the callee. + llvm.call @foo(%1, %2, %3, %4, %5) : (!llvm.memref_1d) -> () + llvm.return +} + +``` + +## Calling Convention for Unranked `memref` + +For unranked memrefs, the list of function arguments always contains two +elements, same as the unranked memref descriptor: an integer rank, and a +type-erased (`!llvm<"i8*">`) pointer to the ranked memref descriptor. Note that +while the _calling convention_ does not require stack allocation, _casting_ to +unranked memref does since one cannot take an address of an SSA value containing +the ranked memref. The caller is in charge of ensuring the thread safety and +eventually removing unnecessary stack allocations in cast operations. + +Example + +```mlir +llvm.func @foo(%arg0: memref<*xf32>) -> () { + "use"(%arg0) : (memref<*xf32>) -> () + return +} + +// Gets converted to the following. + +llvm.func @foo(%arg0: !llvm.i64 // Rank. + %arg1: !llvm.ptr) { // Type-erased pointer to descriptor. + // Pack the unranked memref descriptor. + %0 = llvm.mlir.undef : !llvm.struct<(i64, ptr)> + %1 = llvm.insertvalue %arg0, %0[0] : !llvm.struct<(i64, ptr)> + %2 = llvm.insertvalue %arg1, %1[1] : !llvm.struct<(i64, ptr)> + + "use"(%2) : (!llvm.struct<(i64, ptr)>) -> () + llvm.return +} +``` + +```mlir +llvm.func @bar() { + %0 = "get"() : () -> (memref<*xf32>) + call @foo(%0): (memref<*xf32>) -> () + return +} + +// Gets converted to the following. + +llvm.func @bar() { + %0 = "get"() : () -> (!llvm.struct<(i64, ptr)>) + + // Unpack the memref descriptor. + %1 = llvm.extractvalue %0[0] : !llvm.struct<(i64, ptr)> + %2 = llvm.extractvalue %0[1] : !llvm.struct<(i64, ptr)> + + // Pass individual values to the callee. + llvm.call @foo(%1, %2) : (!llvm.i64, !llvm.ptr) + llvm.return +} +``` + +**Lifetime.** The second element of the unranked memref descriptor points to +some memory in which the ranked memref descriptor is stored. By convention, this +memory is allocated on stack and has the lifetime of the function. (*Note:* due +to function-length lifetime, creation of multiple unranked memref descriptors, +e.g., in a loop, may lead to stack overflows.) If an unranked descriptor has to +be returned from a function, the ranked descriptor it points to is copied into +dynamically allocated memory, and the pointer in the unranked descriptor is +updated accordingly. The allocation happens immediately before returning. It is +the responsibility of the caller to free the dynamically allocated memory. The +default conversion of `std.call` and `std.call_indirect` copies the ranked +descriptor to newly allocated memory on the caller's stack. Thus, the convention +of the ranked memref descriptor pointed to by an unranked memref descriptor +being stored on stack is respected. + +*This convention may or may not apply if the conversion of MemRef types is +overridden by the user.* + +## C-compatible wrapper emission + +In practical cases, it may be desirable to have externally-facing functions with +a single attribute corresponding to a MemRef argument. When interfacing with +LLVM IR produced from C, the code needs to respect the corresponding calling +convention. The conversion to the LLVM dialect provides an option to generate +wrapper functions that take memref descriptors as pointers-to-struct compatible +with data types produced by Clang when compiling C sources. The generation of +such wrapper functions can additionally be controlled at a function granularity +by setting the `llvm.emit_c_interface` unit attribute. + +More specifically, a memref argument is converted into a pointer-to-struct +argument of type `{T*, T*, i64, i64[N], i64[N]}*` in the wrapper function, where +`T` is the converted element type and `N` is the memref rank. This type is +compatible with that produced by Clang for the following C++ structure template +instantiations or their equivalents in C. + +```cpp +template +struct MemRefDescriptor { + T *allocated; + T *aligned; + intptr_t offset; + intptr_t sizes[N]; + intptr_t strides[N]; +}; +``` + +If enabled, the option will do the following. For _external_ functions declared +in the MLIR module. + +1. Declare a new function `_mlir_ciface_` where memref arguments + are converted to pointer-to-struct and the remaining arguments are converted + as usual. +1. Add a body to the original function (making it non-external) that + 1. allocates a memref descriptor, + 1. populates it, and + 1. passes the pointer to it into the newly declared interface function, + then + 1. collects the result of the call and returns it to the caller. + +For (non-external) functions defined in the MLIR module. + +1. Define a new function `_mlir_ciface_` where memref arguments + are converted to pointer-to-struct and the remaining arguments are converted + as usual. +1. Populate the body of the newly defined function with IR that + 1. loads descriptors from pointers; + 1. unpacks descriptor into individual non-aggregate values; + 1. passes these values into the original function; + 1. collects the result of the call and returns it to the caller. + +Examples: + +```mlir + +func @qux(%arg0: memref) + +// Gets converted into the following +// (using type alias for brevity): +!llvm.memref_2d = type !llvm.struct<(ptr, ptr, i64, + array<2xi64>, array<2xi64>)> + +// Function with unpacked arguments. +llvm.func @qux(%arg0: !llvm.ptr, %arg1: !llvm.ptr, + %arg2: !llvm.i64, %arg3: !llvm.i64, %arg4: !llvm.i64, + %arg5: !llvm.i64, %arg6: !llvm.i64) { + // Populate memref descriptor (as per calling convention). + %0 = llvm.mlir.undef : !llvm.memref_2d + %1 = llvm.insertvalue %arg0, %0[0] : !llvm.memref_2d + %2 = llvm.insertvalue %arg1, %1[1] : !llvm.memref_2d + %3 = llvm.insertvalue %arg2, %2[2] : !llvm.memref_2d + %4 = llvm.insertvalue %arg3, %3[3, 0] : !llvm.memref_2d + %5 = llvm.insertvalue %arg5, %4[4, 0] : !llvm.memref_2d + %6 = llvm.insertvalue %arg4, %5[3, 1] : !llvm.memref_2d + %7 = llvm.insertvalue %arg6, %6[4, 1] : !llvm.memref_2d + + // Store the descriptor in a stack-allocated space. + %8 = llvm.mlir.constant(1 : index) : !llvm.i64 + %9 = llvm.alloca %8 x !llvm.memref_2d + : (!llvm.i64) -> !llvm.ptr, ptr, i64, + array<2xi64>, array<2xi64>)>> + llvm.store %7, %9 : !llvm.ptr, ptr, i64, + array<2xi64>, array<2xi64>)>> + + // Call the interface function. + llvm.call @_mlir_ciface_qux(%9) + : (!llvm.ptr, ptr, i64, + array<2xi64>, array<2xi64>)>>) -> () + + // The stored descriptor will be freed on return. + llvm.return +} + +// Interface function. +llvm.func @_mlir_ciface_qux(!llvm.ptr, ptr, i64, + array<2xi64>, array<2xi64>)>>) +``` + +```mlir +func @foo(%arg0: memref) { + return +} + +// Gets converted into the following +// (using type alias for brevity): +!llvm.memref_2d = type !llvm.struct<(ptr, ptr, i64, + array<2xi64>, array<2xi64>)> +!llvm.memref_2d_ptr = type !llvm.ptr, ptr, i64, + array<2xi64>, array<2xi64>)>> + +// Function with unpacked arguments. +llvm.func @foo(%arg0: !llvm.ptr, %arg1: !llvm.ptr, + %arg2: !llvm.i64, %arg3: !llvm.i64, %arg4: !llvm.i64, + %arg5: !llvm.i64, %arg6: !llvm.i64) { + llvm.return +} + +// Interface function callable from C. +llvm.func @_mlir_ciface_foo(%arg0: !llvm.memref_2d_ptr) { + // Load the descriptor. + %0 = llvm.load %arg0 : !llvm.memref_2d_ptr + + // Unpack the descriptor as per calling convention. + %1 = llvm.extractvalue %0[0] : !llvm.memref_2d + %2 = llvm.extractvalue %0[1] : !llvm.memref_2d + %3 = llvm.extractvalue %0[2] : !llvm.memref_2d + %4 = llvm.extractvalue %0[3, 0] : !llvm.memref_2d + %5 = llvm.extractvalue %0[3, 1] : !llvm.memref_2d + %6 = llvm.extractvalue %0[4, 0] : !llvm.memref_2d + %7 = llvm.extractvalue %0[4, 1] : !llvm.memref_2d + llvm.call @foo(%1, %2, %3, %4, %5, %6, %7) + : (!llvm.ptr, !llvm.ptr, !llvm.i64, !llvm.i64, !llvm.i64, + !llvm.i64, !llvm.i64) -> () + llvm.return +} +``` + +Rationale: Introducing auxiliary functions for C-compatible interfaces is +preferred to modifying the calling convention since it will minimize the effect +of C compatibility on intra-module calls or calls between MLIR-generated +functions. In particular, when calling external functions from an MLIR module in +a (parallel) loop, the fact of storing a memref descriptor on stack can lead to +stack exhaustion and/or concurrent access to the same address. Auxiliary +interface function serves as an allocation scope in this case. Furthermore, when +targeting accelerators with separate memory spaces such as GPUs, stack-allocated +descriptors passed by pointer would have to be transferred to the device memory, +which introduces significant overhead. In such situations, auxiliary interface +functions are executed on host and only pass the values through device function +invocation mechanism. + +## Default Memref Model + +### Memref Descriptor + +Within a converted function, a `memref`-typed value is represented by a memref +_descriptor_, the type of which is the structure type obtained by converting +from the memref type. This descriptor holds all the necessary information to +produce an address of a specific element. In particular, it holds dynamic values +for static sizes, and they are expected to match at all times. + +It is created by the allocation operation and is updated by the conversion +operations that may change static dimensions into dynamic dimensions and vice +versa. + +**Note**: LLVM IR conversion does not support `memref`s with layouts that are +not amenable to the strided form. + +### Index Linearization + +Accesses to a memref element are transformed into an access to an element of the +buffer pointed to by the descriptor. The position of the element in the buffer +is calculated by linearizing memref indices in row-major order (lexically first +index is the slowest varying, similar to C, but accounting for strides). The +computation of the linear address is emitted as arithmetic operation in the LLVM +IR dialect. Strides are extracted from the memref descriptor. + +Examples: + +An access to a memref with indices: + +```mlir +%0 = load %m[%1,%2,%3,%4] : memref +``` + +is transformed into the equivalent of the following code: + +```mlir +// Compute the linearized index from strides. +// When strides or, in absence of explicit strides, the corresponding sizes are +// dynamic, extract the stride value from the descriptor. +%stride1 = llvm.extractvalue[4, 0] : !llvm.struct<(ptr, ptr, i64, + array<4xi64>, array<4xi64>)> +%addr1 = muli %stride1, %1 : !llvm.i64 + +// When the stride or, in absence of explicit strides, the trailing sizes are +// known statically, this value is used as a constant. The natural value of +// strides is the product of all sizes following the current dimension. +%stride2 = llvm.mlir.constant(32 : index) : !llvm.i64 +%addr2 = muli %stride2, %2 : !llvm.i64 +%addr3 = addi %addr1, %addr2 : !llvm.i64 + +%stride3 = llvm.mlir.constant(8 : index) : !llvm.i64 +%addr4 = muli %stride3, %3 : !llvm.i64 +%addr5 = addi %addr3, %addr4 : !llvm.i64 + +// Multiplication with the known unit stride can be omitted. +%addr6 = addi %addr5, %4 : !llvm.i64 + +// If the linear offset is known to be zero, it can also be omitted. If it is +// dynamic, it is extracted from the descriptor. +%offset = llvm.extractvalue[2] : !llvm.struct<(ptr, ptr, i64, + array<4xi64>, array<4xi64>)> +%addr7 = addi %addr6, %offset : !llvm.i64 + +// All accesses are based on the aligned pointer. +%aligned = llvm.extractvalue[1] : !llvm.struct<(ptr, ptr, i64, + array<4xi64>, array<4xi64>)> + +// Get the address of the data pointer. +%ptr = llvm.getelementptr %aligned[%addr8] + : !llvm.struct<(ptr, ptr, i64, array<4xi64>, array<4xi64>)> + -> !llvm.ptr + +// Perform the actual load. +%0 = llvm.load %ptr : !llvm.ptr +``` + +For stores, the address computation code is identical and only the actual store +operation is different. + +Note: the conversion does not perform any sort of common subexpression +elimination when emitting memref accesses. From 8451d4872ed70d307e21bc437d51ecf9be0cdd08 Mon Sep 17 00:00:00 2001 From: Christian Sigg Date: Wed, 23 Dec 2020 10:51:23 +0100 Subject: [PATCH 162/378] [mlir] NFC: Remove ConvertToLLVMPattern::getDataPtr(). All call sites use getStridedElementPtr() now. Reviewed By: ftynse Differential Revision: https://reviews.llvm.org/D93751 --- .../mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h | 5 ----- mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp | 6 ------ 2 files changed, 11 deletions(-) diff --git a/mlir/include/mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h b/mlir/include/mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h index 63ff16a84ab87..63547db56e6dc 100644 --- a/mlir/include/mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h +++ b/mlir/include/mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h @@ -512,11 +512,6 @@ class ConvertToLLVMPattern : public ConversionPattern { ValueRange indices, ConversionPatternRewriter &rewriter) const; - // Forwards to getStridedElementPtr. TODO: remove. - Value getDataPtr(Location loc, MemRefType type, Value memRefDesc, - ValueRange indices, - ConversionPatternRewriter &rewriter) const; - /// Returns if the givem memref type is supported. bool isSupportedMemRefType(MemRefType type) const; diff --git a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp index e37e7e2dc0c11..97e763fc08c4e 100644 --- a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp +++ b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp @@ -1085,12 +1085,6 @@ Value ConvertToLLVMPattern::getStridedElementPtr( : base; } -Value ConvertToLLVMPattern::getDataPtr( - Location loc, MemRefType type, Value memRefDesc, ValueRange indices, - ConversionPatternRewriter &rewriter) const { - return getStridedElementPtr(loc, type, memRefDesc, indices, rewriter); -} - // Check if the MemRefType `type` is supported by the lowering. We currently // only support memrefs with identity maps. bool ConvertToLLVMPattern::isSupportedMemRefType(MemRefType type) const { From 32a884c9c52c1216d57835e557233b238d601726 Mon Sep 17 00:00:00 2001 From: Alex Zinenko Date: Tue, 8 Dec 2020 16:45:19 +0100 Subject: [PATCH 163/378] [mlir] Add translation of omp.wsloop to LLVM IR Introduce a translation of OpenMP workshare loop construct to LLVM IR. This is a minimalist version to enable the pipeline and currently only supports static loop schedule (default in the specification) on non-collapsed loops. Other features will be added on per-need basis. Reviewed By: kiranchandramohan Differential Revision: https://reviews.llvm.org/D92055 --- mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td | 5 + .../mlir/Target/LLVMIR/ModuleTranslation.h | 3 + mlir/lib/Target/LLVMIR/ModuleTranslation.cpp | 127 ++++++++++++++++++ mlir/test/Target/openmp-llvm.mlir | 34 ++++- 4 files changed, 168 insertions(+), 1 deletion(-) diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td index f915afcf32c95..6c6230f0c2e8a 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td @@ -185,6 +185,11 @@ def WsLoopOp : OpenMP_Op<"wsloop", [AttrSizedOperandSegments]> { ]; let regions = (region AnyRegion:$region); + + let extraClassDeclaration = [{ + /// Returns the number of loops in the workshape loop nest. + unsigned getNumLoops() { return lowerBound().size(); } + }]; } def YieldOp : OpenMP_Op<"yield", [NoSideEffect, ReturnLike, Terminator, diff --git a/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h b/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h index d3d289414b382..5259ed7fe182c 100644 --- a/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h +++ b/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h @@ -100,6 +100,9 @@ class ModuleTranslation { llvm::BasicBlock &continuationIP, llvm::IRBuilder<> &builder, LogicalResult &bodyGenStatus); + virtual LogicalResult convertOmpWsLoop(Operation &opInst, + llvm::IRBuilder<> &builder); + /// Converts the type from MLIR LLVM dialect to LLVM. llvm::Type *convertType(LLVMType type); diff --git a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp index ae0745b0be28f..0b2cf7de270fa 100644 --- a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp @@ -536,6 +536,126 @@ LogicalResult ModuleTranslation::convertOmpMaster(Operation &opInst, return success(); } +/// Converts an OpenMP workshare loop into LLVM IR using OpenMPIRBuilder. +LogicalResult ModuleTranslation::convertOmpWsLoop(Operation &opInst, + llvm::IRBuilder<> &builder) { + auto loop = cast(opInst); + // TODO: this should be in the op verifier instead. + if (loop.lowerBound().empty()) + return failure(); + + if (loop.getNumLoops() != 1) + return opInst.emitOpError("collapsed loops not yet supported"); + + if (loop.schedule_val().hasValue() && + omp::symbolizeClauseScheduleKind(loop.schedule_val().getValue()) != + omp::ClauseScheduleKind::Static) + return opInst.emitOpError( + "only static (default) loop schedule is currently supported"); + + llvm::Function *func = builder.GetInsertBlock()->getParent(); + llvm::LLVMContext &llvmContext = llvmModule->getContext(); + + // Find the loop configuration. + llvm::Value *lowerBound = valueMapping.lookup(loop.lowerBound()[0]); + llvm::Value *upperBound = valueMapping.lookup(loop.upperBound()[0]); + llvm::Value *step = valueMapping.lookup(loop.step()[0]); + llvm::Type *ivType = step->getType(); + llvm::Value *chunk = loop.schedule_chunk_var() + ? valueMapping[loop.schedule_chunk_var()] + : llvm::ConstantInt::get(ivType, 1); + + // Set up the source location value for OpenMP runtime. + llvm::DISubprogram *subprogram = + builder.GetInsertBlock()->getParent()->getSubprogram(); + const llvm::DILocation *diLoc = + debugTranslation->translateLoc(opInst.getLoc(), subprogram); + llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder.saveIP(), + llvm::DebugLoc(diLoc)); + + // Generator of the canonical loop body. Produces an SESE region of basic + // blocks. + // TODO: support error propagation in OpenMPIRBuilder and use it instead of + // relying on captured variables. + LogicalResult bodyGenStatus = success(); + auto bodyGen = [&](llvm::OpenMPIRBuilder::InsertPointTy ip, llvm::Value *iv) { + llvm::IRBuilder<>::InsertPointGuard guard(builder); + + // Make sure further conversions know about the induction variable. + valueMapping[loop.getRegion().front().getArgument(0)] = iv; + + llvm::BasicBlock *entryBlock = ip.getBlock(); + llvm::BasicBlock *exitBlock = + entryBlock->splitBasicBlock(ip.getPoint(), "omp.wsloop.exit"); + + // Convert the body of the loop. + Region ®ion = loop.region(); + for (Block &bb : region) { + llvm::BasicBlock *llvmBB = + llvm::BasicBlock::Create(llvmContext, "omp.wsloop.region", func); + blockMapping[&bb] = llvmBB; + + // Retarget the branch of the entry block to the entry block of the + // converted region (regions are single-entry). + if (bb.isEntryBlock()) { + auto *branch = cast(entryBlock->getTerminator()); + branch->setSuccessor(0, llvmBB); + } + } + + // Block conversion creates a new IRBuilder every time so need not bother + // about maintaining the insertion point. + llvm::SetVector blocks = topologicalSort(region); + for (Block *bb : blocks) { + if (failed(convertBlock(*bb, bb->isEntryBlock()))) { + bodyGenStatus = failure(); + return; + } + + // Special handling for `omp.yield` terminators (we may have more than + // one): they return the control to the parent WsLoop operation so replace + // them with the branch to the exit block. We handle this here to avoid + // relying inter-function communication through the ModuleTranslation + // class to set up the correct insertion point. This is also consistent + // with MLIR's idiom of handling special region terminators in the same + // code that handles the region-owning operation. + if (isa(bb->getTerminator())) { + llvm::BasicBlock *llvmBB = blockMapping[bb]; + builder.SetInsertPoint(llvmBB, llvmBB->end()); + builder.CreateBr(exitBlock); + } + } + + connectPHINodes(region, valueMapping, blockMapping, branchMapping); + }; + + // Delegate actual loop construction to the OpenMP IRBuilder. + // TODO: this currently assumes WsLoop is semantically similar to SCF loop, + // i.e. it has a positive step, uses signed integer semantics, and its upper + // bound is not included. Reconsider this code when WsLoop clearly supports + // more cases. + llvm::BasicBlock *insertBlock = builder.GetInsertBlock(); + llvm::CanonicalLoopInfo *loopInfo = ompBuilder->createCanonicalLoop( + ompLoc, bodyGen, lowerBound, upperBound, step, /*IsSigned=*/true, + /*InclusiveStop=*/false); + if (failed(bodyGenStatus)) + return failure(); + + // TODO: get the alloca insertion point from the parallel operation builder. + // If we insert the at the top of the current function, they will be passed as + // extra arguments into the function the parallel operation builder outlines. + // Put them at the start of the current block for now. + llvm::OpenMPIRBuilder::InsertPointTy allocaIP( + insertBlock, insertBlock->getFirstInsertionPt()); + loopInfo = ompBuilder->createStaticWorkshareLoop( + ompLoc, loopInfo, allocaIP, + !loop.nowait().hasValue() || loop.nowait().getValue(), chunk); + + // Continue building IR after the loop. + builder.restoreIP(loopInfo->getAfterIP()); + return success(); +} + /// Given an OpenMP MLIR operation, create the corresponding LLVM IR /// (including OpenMP runtime calls). LogicalResult @@ -577,6 +697,13 @@ ModuleTranslation::convertOmpOperation(Operation &opInst, .Case( [&](omp::ParallelOp) { return convertOmpParallel(opInst, builder); }) .Case([&](omp::MasterOp) { return convertOmpMaster(opInst, builder); }) + .Case([&](omp::WsLoopOp) { return convertOmpWsLoop(opInst, builder); }) + .Case([&](omp::YieldOp op) { + // Yields are loop terminators that can be just omitted. The loop + // structure was created in the function that handles WsLoopOp. + assert(op.getNumOperands() == 0 && "unexpected yield with operands"); + return success(); + }) .Default([&](Operation *inst) { return inst->emitError("unsupported OpenMP operation: ") << inst->getName(); diff --git a/mlir/test/Target/openmp-llvm.mlir b/mlir/test/Target/openmp-llvm.mlir index 0651c6f5df408..c5ef16f4393d0 100644 --- a/mlir/test/Target/openmp-llvm.mlir +++ b/mlir/test/Target/openmp-llvm.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s +// RUN: mlir-translate -mlir-to-llvmir -split-input-file %s | FileCheck %s // CHECK-LABEL: define void @test_stand_alone_directives() llvm.func @test_stand_alone_directives() { @@ -291,3 +291,35 @@ llvm.func @test_omp_master() -> () { } llvm.return } + +// ----- + +// CHECK: %struct.ident_t = type +// CHECK: @[[$parallel_loc:.*]] = private unnamed_addr constant {{.*}} c";LLVMDialectModule;wsloop_simple;{{[0-9]+}};{{[0-9]+}};;\00" +// CHECK: @[[$parallel_loc_struct:.*]] = private unnamed_addr constant %struct.ident_t {{.*}} @[[$parallel_loc]], {{.*}} + +// CHECK: @[[$wsloop_loc:.*]] = private unnamed_addr constant {{.*}} c";LLVMDialectModule;wsloop_simple;{{[0-9]+}};{{[0-9]+}};;\00" +// CHECK: @[[$wsloop_loc_struct:.*]] = private unnamed_addr constant %struct.ident_t {{.*}} @[[$wsloop_loc]], {{.*}} + +// CHECK-LABEL: @wsloop_simple +llvm.func @wsloop_simple(%arg0: !llvm.ptr) { + %0 = llvm.mlir.constant(42 : index) : !llvm.i64 + %1 = llvm.mlir.constant(10 : index) : !llvm.i64 + %2 = llvm.mlir.constant(1 : index) : !llvm.i64 + omp.parallel { + "omp.wsloop"(%1, %0, %2) ( { + ^bb0(%arg1: !llvm.i64): + // The form of the emitted IR is controlled by OpenMPIRBuilder and + // tested there. Just check that the right functions are called. + // CHECK: call i32 @__kmpc_global_thread_num + // CHECK: call void @__kmpc_for_static_init_{{.*}}(%struct.ident_t* @[[$wsloop_loc_struct]], + %3 = llvm.mlir.constant(2.000000e+00 : f32) : !llvm.float + %4 = llvm.getelementptr %arg0[%arg1] : (!llvm.ptr, !llvm.i64) -> !llvm.ptr + llvm.store %3, %4 : !llvm.ptr + omp.yield + // CHECK: call void @__kmpc_for_static_fini(%struct.ident_t* @[[$wsloop_loc_struct]], + }) {operand_segment_sizes = dense<[1, 1, 1, 0, 0, 0, 0, 0, 0]> : vector<9xi32>} : (!llvm.i64, !llvm.i64, !llvm.i64) -> () + omp.terminator + } + llvm.return +} From 19a0d0a40ce991836d930ecf8614ad21a1c3c32c Mon Sep 17 00:00:00 2001 From: Christian Sigg Date: Wed, 23 Dec 2020 11:37:49 +0100 Subject: [PATCH 164/378] [mlir] Rename ConvertToLLVMPattern::isSupportedMemRefType() to isConvertibleAndHasIdentityMaps(). Reviewed By: ftynse, herhut Differential Revision: https://reviews.llvm.org/D93752 --- .../StandardToLLVM/ConvertStandardToLLVM.h | 5 +++-- .../GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp | 4 ++-- .../Conversion/StandardToLLVM/StandardToLLVM.cpp | 13 +++++++------ 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/mlir/include/mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h b/mlir/include/mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h index 63547db56e6dc..d5c1e923fab98 100644 --- a/mlir/include/mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h +++ b/mlir/include/mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h @@ -512,8 +512,9 @@ class ConvertToLLVMPattern : public ConversionPattern { ValueRange indices, ConversionPatternRewriter &rewriter) const; - /// Returns if the givem memref type is supported. - bool isSupportedMemRefType(MemRefType type) const; + /// Returns if the given memref has identity maps and the element type is + /// convertible to LLVM. + bool isConvertibleAndHasIdentityMaps(MemRefType type) const; /// Returns the type of a pointer to an element of the memref. Type getElementPtrType(MemRefType type) const; diff --git a/mlir/lib/Conversion/GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp b/mlir/lib/Conversion/GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp index bbb2bf1e04ff2..d35aa0346f743 100644 --- a/mlir/lib/Conversion/GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp +++ b/mlir/lib/Conversion/GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp @@ -369,7 +369,7 @@ LogicalResult ConvertAllocOpToGpuRuntimeCallPattern::matchAndRewrite( MemRefType memRefType = allocOp.getType(); if (failed(areAllLLVMTypes(allocOp, operands, rewriter)) || - !isSupportedMemRefType(memRefType) || + !isConvertibleAndHasIdentityMaps(memRefType) || failed(isAsyncWithOneDependency(rewriter, allocOp))) return failure(); @@ -670,7 +670,7 @@ LogicalResult ConvertMemcpyOpToGpuRuntimeCallPattern::matchAndRewrite( auto memRefType = memcpyOp.src().getType().cast(); if (failed(areAllLLVMTypes(memcpyOp, operands, rewriter)) || - !isSupportedMemRefType(memRefType) || + !isConvertibleAndHasIdentityMaps(memRefType) || failed(isAsyncWithOneDependency(rewriter, memcpyOp))) return failure(); diff --git a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp index 97e763fc08c4e..f4d1df81565bc 100644 --- a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp +++ b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp @@ -1087,7 +1087,8 @@ Value ConvertToLLVMPattern::getStridedElementPtr( // Check if the MemRefType `type` is supported by the lowering. We currently // only support memrefs with identity maps. -bool ConvertToLLVMPattern::isSupportedMemRefType(MemRefType type) const { +bool ConvertToLLVMPattern::isConvertibleAndHasIdentityMaps( + MemRefType type) const { if (!typeConverter->convertType(type.getElementType())) return false; return type.getAffineMaps().empty() || @@ -1105,7 +1106,7 @@ void ConvertToLLVMPattern::getMemRefDescriptorSizes( Location loc, MemRefType memRefType, ArrayRef dynamicSizes, ConversionPatternRewriter &rewriter, SmallVectorImpl &sizes, SmallVectorImpl &strides, Value &sizeBytes) const { - assert(isSupportedMemRefType(memRefType) && + assert(isConvertibleAndHasIdentityMaps(memRefType) && "layout maps must have been normalized away"); sizes.reserve(memRefType.getRank()); @@ -1977,7 +1978,7 @@ struct AllocLikeOpLowering : public ConvertToLLVMPattern { LogicalResult match(Operation *op) const override { MemRefType memRefType = getMemRefResultType(op); - return success(isSupportedMemRefType(memRefType)); + return success(isConvertibleAndHasIdentityMaps(memRefType)); } // An `alloc` is converted into a definition of a memref descriptor value and @@ -2411,7 +2412,7 @@ struct GlobalMemrefOpLowering : public ConvertOpToLLVMPattern { matchAndRewrite(GlobalMemrefOp global, ArrayRef operands, ConversionPatternRewriter &rewriter) const override { MemRefType type = global.type().cast(); - if (!isSupportedMemRefType(type)) + if (!isConvertibleAndHasIdentityMaps(type)) return failure(); LLVM::LLVMType arrayTy = @@ -3031,12 +3032,12 @@ struct RankOpLowering : public ConvertOpToLLVMPattern { template struct LoadStoreOpLowering : public ConvertOpToLLVMPattern { using ConvertOpToLLVMPattern::ConvertOpToLLVMPattern; - using ConvertOpToLLVMPattern::isSupportedMemRefType; + using ConvertOpToLLVMPattern::isConvertibleAndHasIdentityMaps; using Base = LoadStoreOpLowering; LogicalResult match(Derived op) const override { MemRefType type = op.getMemRefType(); - return isSupportedMemRefType(type) ? success() : failure(); + return isConvertibleAndHasIdentityMaps(type) ? success() : failure(); } }; From 25a02c3d1a688d3cd18faef96c75fa553efbbac7 Mon Sep 17 00:00:00 2001 From: Adrian Kuegel Date: Wed, 23 Dec 2020 12:31:52 +0100 Subject: [PATCH 165/378] Revert "PR24076, PR33655, C++ CWG 1558: Consider the instantiation-dependence of" This reverts commit d3bf0bb18952d830fe6df6f791a64552b271000b. This causes compilation in certain cases to fail. Reproducer TBD. --- clang/include/clang/AST/Type.h | 4 +- clang/lib/AST/ItaniumMangle.cpp | 4 - clang/test/CXX/drs/dr15xx.cpp | 14 ---- clang/test/CodeGenCXX/mangle-template.cpp | 20 ----- .../SemaTemplate/instantiation-dependence.cpp | 74 ------------------- .../SemaTemplate/partial-spec-instantiate.cpp | 18 +---- clang/www/cxx_dr_status.html | 2 +- 7 files changed, 4 insertions(+), 132 deletions(-) delete mode 100644 clang/test/SemaTemplate/instantiation-dependence.cpp diff --git a/clang/include/clang/AST/Type.h b/clang/include/clang/AST/Type.h index 21c8bf79152e5..684005c4876d2 100644 --- a/clang/include/clang/AST/Type.h +++ b/clang/include/clang/AST/Type.h @@ -5411,9 +5411,7 @@ class ElaboratedType final ElaboratedType(ElaboratedTypeKeyword Keyword, NestedNameSpecifier *NNS, QualType NamedType, QualType CanonType, TagDecl *OwnedTagDecl) : TypeWithKeyword(Keyword, Elaborated, CanonType, - NamedType->getDependence() | - (NNS ? toTypeDependence(NNS->getDependence()) - : TypeDependence::None)), + NamedType->getDependence()), NNS(NNS), NamedType(NamedType) { ElaboratedTypeBits.HasOwnedTagDecl = false; if (OwnedTagDecl) { diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp index 6c8d5687c64a8..73c8f17a5d364 100644 --- a/clang/lib/AST/ItaniumMangle.cpp +++ b/clang/lib/AST/ItaniumMangle.cpp @@ -2578,10 +2578,6 @@ void CXXNameMangler::mangleType(QualType T) { if (!TST->isTypeAlias()) break; - // FIXME: We presumably shouldn't strip off ElaboratedTypes with - // instantation-dependent qualifiers. See - // https://github.com/itanium-cxx-abi/cxx-abi/issues/114. - QualType Desugared = T.getSingleStepDesugaredType(Context.getASTContext()); if (Desugared == T) diff --git a/clang/test/CXX/drs/dr15xx.cpp b/clang/test/CXX/drs/dr15xx.cpp index 8bfa29a8b6676..478a0d7d00ddf 100644 --- a/clang/test/CXX/drs/dr15xx.cpp +++ b/clang/test/CXX/drs/dr15xx.cpp @@ -239,20 +239,6 @@ namespace dr1550 { // dr1550: yes } } -namespace dr1558 { // dr1558: 12 -#if __cplusplus >= 201103L - template using first_of = T; - template first_of f(int); // expected-note {{'int' cannot be used prior to '::'}} - template void f(...) = delete; // expected-note {{deleted}} - - struct X { typedef void type; }; - void test() { - f(0); - f(0); // expected-error {{deleted}} - } -#endif -} - namespace dr1560 { // dr1560: 3.5 void f(bool b, int n) { (b ? throw 0 : n) = (b ? n : throw 0) = 0; diff --git a/clang/test/CodeGenCXX/mangle-template.cpp b/clang/test/CodeGenCXX/mangle-template.cpp index 40688de7e12e8..9b5220572c2e3 100644 --- a/clang/test/CodeGenCXX/mangle-template.cpp +++ b/clang/test/CodeGenCXX/mangle-template.cpp @@ -342,23 +342,3 @@ namespace fixed_size_parameter_pack { template void f(A::B<0, Ns...>); void g() { f<1, 2>({}); } } - -namespace type_qualifier { - template using int_t = int; - template void f(decltype(int_t() + 1)) {} - // FIXME: This mangling doesn't work: we need to mangle the - // instantiation-dependent 'int_t' operand. - // CHECK: @_ZN14type_qualifier1fIPiEEvDTplcvi_ELi1EE - template void f(int); - - // Note that this template has different constraints but would mangle the - // same: - //template void f(decltype(int_t() + 1)) {} - - struct impl { using type = void; }; - template using alias = impl; - template void g(decltype(alias::type(), 1)) {} - // FIXME: Similarly we need to mangle the `T*` in here. - // CHECK: @_ZN14type_qualifier1gIPiEEvDTcmcvv_ELi1EE - template void g(int); -} diff --git a/clang/test/SemaTemplate/instantiation-dependence.cpp b/clang/test/SemaTemplate/instantiation-dependence.cpp deleted file mode 100644 index 75eb510cb68d6..0000000000000 --- a/clang/test/SemaTemplate/instantiation-dependence.cpp +++ /dev/null @@ -1,74 +0,0 @@ -// RUN: %clang_cc1 -std=c++2b -verify %s - -// Ensure we substitute into instantiation-dependent but non-dependent -// constructs. The poster-child for this is... -template using void_t = void; - -namespace PR24076 { - template T declval(); - struct s {}; - - template() + 1)>> - void foo(T) {} // expected-note {{invalid operands to binary expression}} - - void f() { - foo(s{}); // expected-error {{no matching function}} - } - - template() + 1)>> // expected-error {{invalid operands to binary expression}} - struct bar {}; - - bar bar; // expected-note {{in instantiation of}} -} - -namespace PR33655 { - struct One { using x = int; }; - struct Two { using y = int; }; - - template * = nullptr> int &func() {} - template * = nullptr> float &func() {} - - int &test1 = func(); - float &test2 = func(); - - template struct indirect_void_t_imp { using type = void; }; - template using indirect_void_t = typename indirect_void_t_imp::type; - - template void foo() { - static_assert(!__is_void(indirect_void_t)); // "ok", dependent - static_assert(!__is_void(void_t)); // expected-error {{failed}} - } -} - -namespace PR46791 { // also PR45782 - template - struct trait { - static constexpr int specialization = 0; - }; - - // FIXME: Per a strict interpretation of the C++ rules, the two void_t<...> - // types below are equivalent -- we only (effectively) do token-by-token - // comparison for *expressions* appearing within types. But all other - // implementations accept this, using rules that are unclear. - template - struct trait> { // expected-note {{previous}} FIXME-note {{matches}} - static constexpr int specialization = 1; - }; - - template - struct trait> { // expected-error {{redefinition}} FIXME-note {{matches}} - static constexpr int specialization = 2; - }; - - struct A {}; - struct B { typedef int value_type; }; - struct C { typedef int element_type; }; - struct D : B, C {}; - - static_assert(trait::specialization == 0); - static_assert(trait::specialization == 1); // FIXME expected-error {{failed}} - static_assert(trait::specialization == 2); // FIXME expected-error {{failed}} - static_assert(trait::specialization == 0); // FIXME-error {{ambiguous partial specialization}} -} diff --git a/clang/test/SemaTemplate/partial-spec-instantiate.cpp b/clang/test/SemaTemplate/partial-spec-instantiate.cpp index 3b7cee88c42ec..2fc0517ae3d3c 100644 --- a/clang/test/SemaTemplate/partial-spec-instantiate.cpp +++ b/clang/test/SemaTemplate/partial-spec-instantiate.cpp @@ -51,6 +51,8 @@ namespace rdar9169404 { X::type value; #if __cplusplus >= 201103L // expected-error@-2 {{non-type template argument evaluates to -1, which cannot be narrowed to type 'bool'}} +#else + // expected-no-diagnostics #endif } @@ -96,19 +98,3 @@ namespace rdar39524996 { takesWrapperInContainer(c); } } - -namespace InstantiationDependent { - template using ignore = void; // expected-warning 0-1{{extension}} - template struct A { - static const bool specialized = false; - }; - template struct Hide { typedef void type; }; - template struct A >::type> { - static const bool specialized = true; - }; - - struct X {}; - struct Y { typedef int type; }; - _Static_assert(!A::specialized, ""); - _Static_assert(A::specialized, ""); -} diff --git a/clang/www/cxx_dr_status.html b/clang/www/cxx_dr_status.html index 57093c1cf5b0c..f2f711b550946 100755 --- a/clang/www/cxx_dr_status.html +++ b/clang/www/cxx_dr_status.html @@ -9162,7 +9162,7 @@

C++ defect report implementation status

1558 CD4 Unused arguments in alias template specializations - Clang 12 + Unknown 1559 From eb9483b21053656b885f13ccfe41bfa76eb3df45 Mon Sep 17 00:00:00 2001 From: Nathan James Date: Wed, 23 Dec 2020 12:08:28 +0000 Subject: [PATCH 166/378] [format] Add overload to parseConfiguration that accept llvm::MemoryBufferRef This overload should be used for better diagnostics when parsing configurations. Now a failure to parse will list the filename (or ) instead of just `YAML`. Reviewed By: MyDeveloperDay Differential Revision: https://reviews.llvm.org/D93633 --- clang/include/clang/Format/Format.h | 13 +++++++++++-- clang/lib/Format/Format.cpp | 16 +++++++++------- clang/test/Format/error-config.cpp | 4 ++-- 3 files changed, 22 insertions(+), 11 deletions(-) diff --git a/clang/include/clang/Format/Format.h b/clang/include/clang/Format/Format.h index fd5c0e32c5c22..208fc105d4b61 100644 --- a/clang/include/clang/Format/Format.h +++ b/clang/include/clang/Format/Format.h @@ -2879,7 +2879,8 @@ struct FormatStyle { private: FormatStyleSet StyleSet; - friend std::error_code parseConfiguration(StringRef Text, FormatStyle *Style, + friend std::error_code parseConfiguration(llvm::MemoryBufferRef Config, + FormatStyle *Style, bool AllowUnknownOptions); }; @@ -2938,9 +2939,17 @@ bool getPredefinedStyle(StringRef Name, FormatStyle::LanguageKind Language, /// /// If AllowUnknownOptions is true, no errors are emitted if unknown /// format options are occured. -std::error_code parseConfiguration(StringRef Text, FormatStyle *Style, +std::error_code parseConfiguration(llvm::MemoryBufferRef Config, + FormatStyle *Style, bool AllowUnknownOptions = false); +/// Like above but accepts an unnamed buffer. +inline std::error_code parseConfiguration(StringRef Config, FormatStyle *Style, + bool AllowUnknownOptions = false) { + return parseConfiguration(llvm::MemoryBufferRef(Config, "YAML"), Style, + AllowUnknownOptions); +} + /// Gets configuration in a YAML string. std::string configurationAsText(const FormatStyle &Style); diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp index 23eee19b16404..55abc12c61c4c 100644 --- a/clang/lib/Format/Format.cpp +++ b/clang/lib/Format/Format.cpp @@ -1327,16 +1327,17 @@ bool getPredefinedStyle(StringRef Name, FormatStyle::LanguageKind Language, return true; } -std::error_code parseConfiguration(StringRef Text, FormatStyle *Style, +std::error_code parseConfiguration(llvm::MemoryBufferRef Config, + FormatStyle *Style, bool AllowUnknownOptions) { assert(Style); FormatStyle::LanguageKind Language = Style->Language; assert(Language != FormatStyle::LK_None); - if (Text.trim().empty()) + if (Config.getBuffer().trim().empty()) return make_error_code(ParseError::Error); Style->StyleSet.Clear(); std::vector Styles; - llvm::yaml::Input Input(Text); + llvm::yaml::Input Input(Config); // DocumentListTraits> uses the context to get default // values for the fields, keys for which are missing from the configuration. // Mapping also uses the context to get the language to find the correct @@ -2864,8 +2865,9 @@ llvm::Expected getStyle(StringRef StyleName, StringRef FileName, if (StyleName.startswith("{")) { // Parse YAML/JSON style from the command line. - if (std::error_code ec = - parseConfiguration(StyleName, &Style, AllowUnknownOptions)) + if (std::error_code ec = parseConfiguration( + llvm::MemoryBufferRef(StyleName, ""), &Style, + AllowUnknownOptions)) return make_string_error("Error parsing -style: " + ec.message()); return Style; } @@ -2909,8 +2911,8 @@ llvm::Expected getStyle(StringRef StyleName, StringRef FileName, FS->getBufferForFile(ConfigFile.str()); if (std::error_code EC = Text.getError()) return make_string_error(EC.message()); - if (std::error_code ec = parseConfiguration( - Text.get()->getBuffer(), &Style, AllowUnknownOptions)) { + if (std::error_code ec = + parseConfiguration(*Text.get(), &Style, AllowUnknownOptions)) { if (ec == ParseError::Unsuitable) { if (!UnsuitableConfigFiles.empty()) UnsuitableConfigFiles.append(", "); diff --git a/clang/test/Format/error-config.cpp b/clang/test/Format/error-config.cpp index 7fbc869f3a3cc..9f73a9eb9507e 100644 --- a/clang/test/Format/error-config.cpp +++ b/clang/test/Format/error-config.cpp @@ -1,10 +1,10 @@ // RUN: clang-format %s --Wno-error=unknown --style="{UnknownKey: true}" 2>&1 | FileCheck %s -check-prefix=CHECK // RUN: not clang-format %s --style="{UnknownKey: true}" 2>&1 | FileCheck %s -check-prefix=CHECK-FAIL -// CHECK: YAML:1:2: warning: unknown key 'UnknownKey' +// CHECK: :1:2: warning: unknown key 'UnknownKey' // CHECK-NEXT: {UnknownKey: true} // CHECK-NEXT: ^~~~~~~~~~ -// CHECK-FAIL: YAML:1:2: error: unknown key 'UnknownKey' +// CHECK-FAIL: :1:2: error: unknown key 'UnknownKey' // CHECK-FAIL-NEXT: {UnknownKey: true} // CHECK-FAIL-NEXT: ^~~~~~~~~~ From 7ed9cfc7b19fdba9eb441ce1a8ba82cda14d76a8 Mon Sep 17 00:00:00 2001 From: Alex Zinenko Date: Tue, 22 Dec 2020 11:22:56 +0100 Subject: [PATCH 167/378] [mlir] Remove static constructors from LLVMType LLVMType contains numerous static constructors that were initially introduced for API compatibility with LLVM. Most of these merely forward to arguments to `SpecificType::get` (MLIR defines classes for all types, unlike LLVM IR), while some introduce subtle semantics differences due to different modeling of MLIR types (e.g., structs are not auto-renamed in case of conflicts). Furthermore, these constructors don't match MLIR idioms and actively prevent us from making the LLVM dialect type system more open. Remove them and use `SpecificType::get` instead. Depends On D93680 Reviewed By: mehdi_amini Differential Revision: https://reviews.llvm.org/D93681 --- mlir/examples/toy/Ch6/mlir/LowerToLLVM.cpp | 21 +-- mlir/examples/toy/Ch7/mlir/LowerToLLVM.cpp | 21 +-- mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td | 4 +- mlir/include/mlir/Dialect/LLVMIR/LLVMTypes.h | 109 ++-------------- .../Conversion/AsyncToLLVM/AsyncToLLVM.cpp | 54 ++++---- .../ConvertLaunchFuncToRuntimeCalls.cpp | 27 ++-- .../lib/Conversion/GPUCommon/GPUOpsLowering.h | 6 +- .../GPUCommon/IndexIntrinsicsOpLowering.h | 13 +- .../GPUCommon/OpToFuncCallLowering.h | 5 +- .../GPUToNVVM/LowerGpuOpsToNVVMOps.cpp | 8 +- .../ConvertLaunchFuncToVulkanCalls.cpp | 61 +++++---- .../Conversion/LinalgToLLVM/LinalgToLLVM.cpp | 2 +- .../ConvertLaunchFuncToLLVMCalls.cpp | 7 +- .../SPIRVToLLVM/ConvertSPIRVToLLVM.cpp | 20 +-- .../StandardToLLVM/StandardToLLVM.cpp | 91 ++++++------- .../VectorToLLVM/ConvertVectorToLLVM.cpp | 26 ++-- .../VectorToROCDL/VectorToROCDL.cpp | 4 +- mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp | 45 +++---- mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp | 121 ++++-------------- mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp | 17 +-- mlir/lib/Dialect/LLVMIR/IR/ROCDLDialect.cpp | 12 +- mlir/lib/Target/LLVMIR/ModuleTranslation.cpp | 5 +- .../test/lib/Transforms/TestConvertCallOp.cpp | 3 +- 23 files changed, 267 insertions(+), 415 deletions(-) diff --git a/mlir/examples/toy/Ch6/mlir/LowerToLLVM.cpp b/mlir/examples/toy/Ch6/mlir/LowerToLLVM.cpp index a04b3ecd4daee..6fbf29f4128d7 100644 --- a/mlir/examples/toy/Ch6/mlir/LowerToLLVM.cpp +++ b/mlir/examples/toy/Ch6/mlir/LowerToLLVM.cpp @@ -111,10 +111,11 @@ class PrintOpLowering : public ConversionPattern { // Create a function declaration for printf, the signature is: // * `i32 (i8*, ...)` - auto llvmI32Ty = LLVM::LLVMType::getInt32Ty(context); - auto llvmI8PtrTy = LLVM::LLVMType::getInt8PtrTy(context); - auto llvmFnType = LLVM::LLVMType::getFunctionTy(llvmI32Ty, llvmI8PtrTy, - /*isVarArg=*/true); + auto llvmI32Ty = LLVM::LLVMIntegerType::get(context, 32); + auto llvmI8PtrTy = + LLVM::LLVMPointerType::get(LLVM::LLVMIntegerType::get(context, 8)); + auto llvmFnType = LLVM::LLVMFunctionType::get(llvmI32Ty, llvmI8PtrTy, + /*isVarArg=*/true); // Insert the printf function into the body of the parent module. PatternRewriter::InsertionGuard insertGuard(rewriter); @@ -133,8 +134,8 @@ class PrintOpLowering : public ConversionPattern { if (!(global = module.lookupSymbol(name))) { OpBuilder::InsertionGuard insertGuard(builder); builder.setInsertionPointToStart(module.getBody()); - auto type = LLVM::LLVMType::getArrayTy( - LLVM::LLVMType::getInt8Ty(builder.getContext()), value.size()); + auto type = LLVM::LLVMArrayType::get( + LLVM::LLVMIntegerType::get(builder.getContext(), 8), value.size()); global = builder.create(loc, type, /*isConstant=*/true, LLVM::Linkage::Internal, name, builder.getStringAttr(value)); @@ -143,11 +144,13 @@ class PrintOpLowering : public ConversionPattern { // Get the pointer to the first character in the global string. Value globalPtr = builder.create(loc, global); Value cst0 = builder.create( - loc, LLVM::LLVMType::getInt64Ty(builder.getContext()), + loc, LLVM::LLVMIntegerType::get(builder.getContext(), 64), builder.getIntegerAttr(builder.getIndexType(), 0)); return builder.create( - loc, LLVM::LLVMType::getInt8PtrTy(builder.getContext()), globalPtr, - ArrayRef({cst0, cst0})); + loc, + LLVM::LLVMPointerType::get( + LLVM::LLVMIntegerType::get(builder.getContext(), 8)), + globalPtr, ArrayRef({cst0, cst0})); } }; } // end anonymous namespace diff --git a/mlir/examples/toy/Ch7/mlir/LowerToLLVM.cpp b/mlir/examples/toy/Ch7/mlir/LowerToLLVM.cpp index a04b3ecd4daee..6fbf29f4128d7 100644 --- a/mlir/examples/toy/Ch7/mlir/LowerToLLVM.cpp +++ b/mlir/examples/toy/Ch7/mlir/LowerToLLVM.cpp @@ -111,10 +111,11 @@ class PrintOpLowering : public ConversionPattern { // Create a function declaration for printf, the signature is: // * `i32 (i8*, ...)` - auto llvmI32Ty = LLVM::LLVMType::getInt32Ty(context); - auto llvmI8PtrTy = LLVM::LLVMType::getInt8PtrTy(context); - auto llvmFnType = LLVM::LLVMType::getFunctionTy(llvmI32Ty, llvmI8PtrTy, - /*isVarArg=*/true); + auto llvmI32Ty = LLVM::LLVMIntegerType::get(context, 32); + auto llvmI8PtrTy = + LLVM::LLVMPointerType::get(LLVM::LLVMIntegerType::get(context, 8)); + auto llvmFnType = LLVM::LLVMFunctionType::get(llvmI32Ty, llvmI8PtrTy, + /*isVarArg=*/true); // Insert the printf function into the body of the parent module. PatternRewriter::InsertionGuard insertGuard(rewriter); @@ -133,8 +134,8 @@ class PrintOpLowering : public ConversionPattern { if (!(global = module.lookupSymbol(name))) { OpBuilder::InsertionGuard insertGuard(builder); builder.setInsertionPointToStart(module.getBody()); - auto type = LLVM::LLVMType::getArrayTy( - LLVM::LLVMType::getInt8Ty(builder.getContext()), value.size()); + auto type = LLVM::LLVMArrayType::get( + LLVM::LLVMIntegerType::get(builder.getContext(), 8), value.size()); global = builder.create(loc, type, /*isConstant=*/true, LLVM::Linkage::Internal, name, builder.getStringAttr(value)); @@ -143,11 +144,13 @@ class PrintOpLowering : public ConversionPattern { // Get the pointer to the first character in the global string. Value globalPtr = builder.create(loc, global); Value cst0 = builder.create( - loc, LLVM::LLVMType::getInt64Ty(builder.getContext()), + loc, LLVM::LLVMIntegerType::get(builder.getContext(), 64), builder.getIntegerAttr(builder.getIndexType(), 0)); return builder.create( - loc, LLVM::LLVMType::getInt8PtrTy(builder.getContext()), globalPtr, - ArrayRef({cst0, cst0})); + loc, + LLVM::LLVMPointerType::get( + LLVM::LLVMIntegerType::get(builder.getContext(), 8)), + globalPtr, ArrayRef({cst0, cst0})); } }; } // end anonymous namespace diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td index 552fe15e68997..4968b33f47a47 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td @@ -150,7 +150,7 @@ def LLVM_ICmpOp : LLVM_Op<"icmp", [NoSideEffect]> { let builders = [ OpBuilderDAG<(ins "ICmpPredicate":$predicate, "Value":$lhs, "Value":$rhs), [{ - build($_builder, $_state, LLVMType::getInt1Ty(lhs.getType().getContext()), + build($_builder, $_state, LLVMIntegerType::get(lhs.getType().getContext(), 1), $_builder.getI64IntegerAttr(static_cast(predicate)), lhs, rhs); }]>]; let parser = [{ return parseCmpOp(parser, result); }]; @@ -198,7 +198,7 @@ def LLVM_FCmpOp : LLVM_Op<"fcmp", [NoSideEffect]> { let builders = [ OpBuilderDAG<(ins "FCmpPredicate":$predicate, "Value":$lhs, "Value":$rhs), [{ - build($_builder, $_state, LLVMType::getInt1Ty(lhs.getType().getContext()), + build($_builder, $_state, LLVMIntegerType::get(lhs.getType().getContext(), 1), $_builder.getI64IntegerAttr(static_cast(predicate)), lhs, rhs); }]>]; let parser = [{ return parseCmpOp(parser, result); }]; diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMTypes.h b/mlir/include/mlir/Dialect/LLVMIR/LLVMTypes.h index e1938c12c809e..7c7731946ba80 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMTypes.h +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMTypes.h @@ -53,9 +53,7 @@ class LLVMIntegerType; /// /// The LLVM dialect in MLIR fully reflects the LLVM IR type system, prodiving a /// separate MLIR type for each LLVM IR type. All types are represented as -/// separate subclasses and are compatible with the isa/cast infrastructure. For -/// convenience, the base class provides most of the APIs available on -/// llvm::Type in addition to MLIR-compatible APIs. +/// separate subclasses and are compatible with the isa/cast infrastructure. /// /// The LLVM dialect type system is closed: parametric types can only refer to /// other LLVM dialect types. This is consistent with LLVM IR and enables a more @@ -64,6 +62,11 @@ class LLVMIntegerType; /// Similarly to other MLIR types, LLVM dialect types are owned by the MLIR /// context, have an immutable identifier (for most types except identified /// structs, the entire type is the identifier) and are thread-safe. +/// +/// This class is a thin common base class for different types available in the +/// LLVM dialect. It intentionally does not provide the API similar to +/// llvm::Type to avoid confusion and highlight potentially expensive operations +/// (e.g., type creation in MLIR takes a lock, so it's better to cache types). class LLVMType : public Type { public: /// Inherit base constructors. @@ -79,98 +82,6 @@ class LLVMType : public Type { static bool classof(Type type); LLVMDialect &getDialect(); - - /// Utilities used to generate floating point types. - static LLVMType getDoubleTy(MLIRContext *context); - static LLVMType getFloatTy(MLIRContext *context); - static LLVMType getBFloatTy(MLIRContext *context); - static LLVMType getHalfTy(MLIRContext *context); - static LLVMType getFP128Ty(MLIRContext *context); - static LLVMType getX86_FP80Ty(MLIRContext *context); - - /// Utilities used to generate integer types. - static LLVMType getIntNTy(MLIRContext *context, unsigned numBits); - static LLVMType getInt1Ty(MLIRContext *context) { - return getIntNTy(context, /*numBits=*/1); - } - static LLVMType getInt8Ty(MLIRContext *context) { - return getIntNTy(context, /*numBits=*/8); - } - static LLVMType getInt8PtrTy(MLIRContext *context); - static LLVMType getInt16Ty(MLIRContext *context) { - return getIntNTy(context, /*numBits=*/16); - } - static LLVMType getInt32Ty(MLIRContext *context) { - return getIntNTy(context, /*numBits=*/32); - } - static LLVMType getInt64Ty(MLIRContext *context) { - return getIntNTy(context, /*numBits=*/64); - } - - /// Utilities used to generate other miscellaneous types. - static LLVMType getArrayTy(LLVMType elementType, uint64_t numElements); - static LLVMType getFunctionTy(LLVMType result, ArrayRef params, - bool isVarArg); - static LLVMType getFunctionTy(LLVMType result, bool isVarArg) { - return getFunctionTy(result, llvm::None, isVarArg); - } - static LLVMType getStructTy(MLIRContext *context, ArrayRef elements, - bool isPacked = false); - static LLVMType getStructTy(MLIRContext *context, bool isPacked = false) { - return getStructTy(context, llvm::None, isPacked); - } - template - static typename std::enable_if::value, - LLVMType>::type - getStructTy(LLVMType elt1, Args... elts) { - SmallVector fields({elt1, elts...}); - return getStructTy(elt1.getContext(), fields); - } - static LLVMType getVectorTy(LLVMType elementType, unsigned numElements); - - /// Void type utilities. - static LLVMType getVoidTy(MLIRContext *context); - - // Creation and setting of LLVM's identified struct types - static LLVMType createStructTy(MLIRContext *context, - ArrayRef elements, - Optional name, - bool isPacked = false); - - static LLVMType createStructTy(MLIRContext *context, - Optional name) { - return createStructTy(context, llvm::None, name); - } - - static LLVMType createStructTy(ArrayRef elements, - Optional name, - bool isPacked = false) { - assert(!elements.empty() && - "This method may not be invoked with an empty list"); - LLVMType ele0 = elements.front(); - return createStructTy(ele0.getContext(), elements, name, isPacked); - } - - template - static typename std::enable_if_t::value, - LLVMType> - createStructTy(StringRef name, LLVMType elt1, Args... elts) { - SmallVector fields({elt1, elts...}); - Optional opt_name(name); - return createStructTy(elt1.getContext(), fields, opt_name); - } - - static LLVMType setStructTyBody(LLVMType structType, - ArrayRef elements, - bool isPacked = false); - - template - static typename std::enable_if_t::value, - LLVMType> - setStructTyBody(LLVMType structType, LLVMType elt1, Args... elts) { - SmallVector fields({elt1, elts...}); - return setStructTyBody(structType, fields); - } }; //===----------------------------------------------------------------------===// @@ -386,6 +297,14 @@ class LLVMStructType : public Type::TypeBase elements, + bool isPacked = false); + /// Gets or creates a literal struct with the given body in the provided /// context. static LLVMStructType getLiteral(MLIRContext *context, diff --git a/mlir/lib/Conversion/AsyncToLLVM/AsyncToLLVM.cpp b/mlir/lib/Conversion/AsyncToLLVM/AsyncToLLVM.cpp index 2415924557db7..3daa70b0a9528 100644 --- a/mlir/lib/Conversion/AsyncToLLVM/AsyncToLLVM.cpp +++ b/mlir/lib/Conversion/AsyncToLLVM/AsyncToLLVM.cpp @@ -52,7 +52,7 @@ namespace { // Async Runtime API function types. struct AsyncAPI { static FunctionType addOrDropRefFunctionType(MLIRContext *ctx) { - auto ref = LLVM::LLVMType::getInt8PtrTy(ctx); + auto ref = LLVM::LLVMPointerType::get(LLVM::LLVMIntegerType::get(ctx, 8)); auto count = IntegerType::get(ctx, 32); return FunctionType::get(ctx, {ref, count}, {}); } @@ -78,7 +78,7 @@ struct AsyncAPI { } static FunctionType executeFunctionType(MLIRContext *ctx) { - auto hdl = LLVM::LLVMType::getInt8PtrTy(ctx); + auto hdl = LLVM::LLVMPointerType::get(LLVM::LLVMIntegerType::get(ctx, 8)); auto resume = LLVM::LLVMPointerType::get(resumeFunctionType(ctx)); return FunctionType::get(ctx, {hdl, resume}, {}); } @@ -90,22 +90,22 @@ struct AsyncAPI { } static FunctionType awaitAndExecuteFunctionType(MLIRContext *ctx) { - auto hdl = LLVM::LLVMType::getInt8PtrTy(ctx); + auto hdl = LLVM::LLVMPointerType::get(LLVM::LLVMIntegerType::get(ctx, 8)); auto resume = LLVM::LLVMPointerType::get(resumeFunctionType(ctx)); return FunctionType::get(ctx, {TokenType::get(ctx), hdl, resume}, {}); } static FunctionType awaitAllAndExecuteFunctionType(MLIRContext *ctx) { - auto hdl = LLVM::LLVMType::getInt8PtrTy(ctx); + auto hdl = LLVM::LLVMPointerType::get(LLVM::LLVMIntegerType::get(ctx, 8)); auto resume = LLVM::LLVMPointerType::get(resumeFunctionType(ctx)); return FunctionType::get(ctx, {GroupType::get(ctx), hdl, resume}, {}); } // Auxiliary coroutine resume intrinsic wrapper. static LLVM::LLVMType resumeFunctionType(MLIRContext *ctx) { - auto voidTy = LLVM::LLVMType::getVoidTy(ctx); - auto i8Ptr = LLVM::LLVMType::getInt8PtrTy(ctx); - return LLVM::LLVMType::getFunctionTy(voidTy, {i8Ptr}, false); + auto voidTy = LLVM::LLVMVoidType::get(ctx); + auto i8Ptr = LLVM::LLVMPointerType::get(LLVM::LLVMIntegerType::get(ctx, 8)); + return LLVM::LLVMFunctionType::get(voidTy, {i8Ptr}, false); } }; } // namespace @@ -155,7 +155,7 @@ static void addLLVMFuncDecl(ModuleOp module, ImplicitLocOpBuilder &builder, ArrayRef params) { if (module.lookupSymbol(name)) return; - LLVM::LLVMType type = LLVM::LLVMType::getFunctionTy(ret, params, false); + LLVM::LLVMType type = LLVM::LLVMFunctionType::get(ret, params); builder.create(name, type); } @@ -168,13 +168,13 @@ static void addCoroutineIntrinsicsDeclarations(ModuleOp module) { module.getBody()->getTerminator()); auto token = LLVMTokenType::get(ctx); - auto voidTy = LLVMType::getVoidTy(ctx); + auto voidTy = LLVMVoidType::get(ctx); - auto i8 = LLVMType::getInt8Ty(ctx); - auto i1 = LLVMType::getInt1Ty(ctx); - auto i32 = LLVMType::getInt32Ty(ctx); - auto i64 = LLVMType::getInt64Ty(ctx); - auto i8Ptr = LLVMType::getInt8PtrTy(ctx); + auto i8 = LLVMIntegerType::get(ctx, 8); + auto i1 = LLVMIntegerType::get(ctx, 1); + auto i32 = LLVMIntegerType::get(ctx, 32); + auto i64 = LLVMIntegerType::get(ctx, 64); + auto i8Ptr = LLVMPointerType::get(i8); addLLVMFuncDecl(module, builder, kCoroId, token, {i32, i8Ptr, i8Ptr, i8Ptr}); addLLVMFuncDecl(module, builder, kCoroSizeI64, i64, {}); @@ -201,9 +201,9 @@ static void addCRuntimeDeclarations(ModuleOp module) { ImplicitLocOpBuilder builder(module.getLoc(), module.getBody()->getTerminator()); - auto voidTy = LLVMType::getVoidTy(ctx); - auto i64 = LLVMType::getInt64Ty(ctx); - auto i8Ptr = LLVMType::getInt8PtrTy(ctx); + auto voidTy = LLVMVoidType::get(ctx); + auto i64 = LLVMIntegerType::get(ctx, 64); + auto i8Ptr = LLVMPointerType::get(LLVMIntegerType::get(ctx, 8)); addLLVMFuncDecl(module, builder, kMalloc, i8Ptr, {i64}); addLLVMFuncDecl(module, builder, kFree, voidTy, {i8Ptr}); @@ -227,11 +227,11 @@ static void addResumeFunction(ModuleOp module) { if (module.lookupSymbol(kResume)) return; - auto voidTy = LLVM::LLVMType::getVoidTy(ctx); - auto i8Ptr = LLVM::LLVMType::getInt8PtrTy(ctx); + auto voidTy = LLVM::LLVMVoidType::get(ctx); + auto i8Ptr = LLVM::LLVMPointerType::get(LLVM::LLVMIntegerType::get(ctx, 8)); auto resumeOp = moduleBuilder.create( - loc, kResume, LLVM::LLVMType::getFunctionTy(voidTy, {i8Ptr}, false)); + loc, kResume, LLVM::LLVMFunctionType::get(voidTy, {i8Ptr})); resumeOp.setPrivate(); auto *block = resumeOp.addEntryBlock(); @@ -297,10 +297,10 @@ static CoroMachinery setupCoroMachinery(FuncOp func) { MLIRContext *ctx = func.getContext(); auto token = LLVM::LLVMTokenType::get(ctx); - auto i1 = LLVM::LLVMType::getInt1Ty(ctx); - auto i32 = LLVM::LLVMType::getInt32Ty(ctx); - auto i64 = LLVM::LLVMType::getInt64Ty(ctx); - auto i8Ptr = LLVM::LLVMType::getInt8PtrTy(ctx); + auto i1 = LLVM::LLVMIntegerType::get(ctx, 1); + auto i32 = LLVM::LLVMIntegerType::get(ctx, 32); + auto i64 = LLVM::LLVMIntegerType::get(ctx, 64); + auto i8Ptr = LLVM::LLVMPointerType::get(LLVM::LLVMIntegerType::get(ctx, 8)); Block *entryBlock = func.addEntryBlock(); Location loc = func.getBody().getLoc(); @@ -421,8 +421,8 @@ static void addSuspensionPoint(CoroMachinery coro, Value coroState, OpBuilder &builder) { Location loc = op->getLoc(); MLIRContext *ctx = op->getContext(); - auto i1 = LLVM::LLVMType::getInt1Ty(ctx); - auto i8 = LLVM::LLVMType::getInt8Ty(ctx); + auto i1 = LLVM::LLVMIntegerType::get(ctx, 1); + auto i8 = LLVM::LLVMIntegerType::get(ctx, 8); // Add a coroutine suspension in place of original `op` in the split block. OpBuilder::InsertionGuard guard(builder); @@ -568,7 +568,7 @@ class AsyncRuntimeTypeConverter : public TypeConverter { MLIRContext *ctx = type.getContext(); // Convert async tokens and groups to opaque pointers. if (type.isa()) - return LLVM::LLVMType::getInt8PtrTy(ctx); + return LLVM::LLVMPointerType::get(LLVM::LLVMIntegerType::get(ctx, 8)); return type; } }; diff --git a/mlir/lib/Conversion/GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp b/mlir/lib/Conversion/GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp index d35aa0346f743..6859834de67f5 100644 --- a/mlir/lib/Conversion/GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp +++ b/mlir/lib/Conversion/GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp @@ -55,8 +55,7 @@ class FunctionCallBuilder { FunctionCallBuilder(StringRef functionName, LLVM::LLVMType returnType, ArrayRef argumentTypes) : functionName(functionName), - functionType(LLVM::LLVMFunctionType::get(returnType, argumentTypes, - /*isVarArg=*/false)) {} + functionType(LLVM::LLVMFunctionType::get(returnType, argumentTypes)) {} LLVM::CallOp create(Location loc, OpBuilder &builder, ArrayRef arguments) const; @@ -74,14 +73,15 @@ class ConvertOpToGpuRuntimeCallPattern : public ConvertOpToLLVMPattern { protected: MLIRContext *context = &this->getTypeConverter()->getContext(); - LLVM::LLVMType llvmVoidType = LLVM::LLVMType::getVoidTy(context); - LLVM::LLVMType llvmPointerType = LLVM::LLVMType::getInt8PtrTy(context); + LLVM::LLVMType llvmVoidType = LLVM::LLVMVoidType::get(context); + LLVM::LLVMType llvmPointerType = + LLVM::LLVMPointerType::get(LLVM::LLVMIntegerType::get(context, 8)); LLVM::LLVMType llvmPointerPointerType = LLVM::LLVMPointerType::get(llvmPointerType); - LLVM::LLVMType llvmInt8Type = LLVM::LLVMType::getInt8Ty(context); - LLVM::LLVMType llvmInt32Type = LLVM::LLVMType::getInt32Ty(context); - LLVM::LLVMType llvmInt64Type = LLVM::LLVMType::getInt64Ty(context); - LLVM::LLVMType llvmIntPtrType = LLVM::LLVMType::getIntNTy( + LLVM::LLVMType llvmInt8Type = LLVM::LLVMIntegerType::get(context, 8); + LLVM::LLVMType llvmInt32Type = LLVM::LLVMIntegerType::get(context, 32); + LLVM::LLVMType llvmInt64Type = LLVM::LLVMIntegerType::get(context, 64); + LLVM::LLVMType llvmIntPtrType = LLVM::LLVMIntegerType::get( context, this->getTypeConverter()->getPointerBitwidth(0)); FunctionCallBuilder moduleLoadCallBuilder = { @@ -515,7 +515,8 @@ Value ConvertLaunchFuncOpToGpuRuntimeCallPattern::generateParamsArray( argumentTypes.reserve(numArguments); for (auto argument : arguments) argumentTypes.push_back(argument.getType().cast()); - auto structType = LLVM::LLVMType::createStructTy(argumentTypes, StringRef()); + auto structType = LLVM::LLVMStructType::getNewIdentified(context, StringRef(), + argumentTypes); auto one = builder.create(loc, llvmInt32Type, builder.getI32IntegerAttr(1)); auto structPtr = builder.create( @@ -716,10 +717,10 @@ mlir::createGpuToLLVMConversionPass(StringRef gpuBinaryAnnotation) { void mlir::populateGpuToLLVMConversionPatterns( LLVMTypeConverter &converter, OwningRewritePatternList &patterns, StringRef gpuBinaryAnnotation) { - converter.addConversion( - [context = &converter.getContext()](gpu::AsyncTokenType type) -> Type { - return LLVM::LLVMType::getInt8PtrTy(context); - }); + converter.addConversion([context = &converter.getContext()]( + gpu::AsyncTokenType type) -> Type { + return LLVM::LLVMPointerType::get(LLVM::LLVMIntegerType::get(context, 8)); + }); patterns.insert { auto elementType = typeConverter->convertType(type.getElementType()) .template cast(); - auto arrayType = LLVM::LLVMType::getArrayTy(elementType, numElements); + auto arrayType = LLVM::LLVMArrayType::get(elementType, numElements); std::string name = std::string( llvm::formatv("__wg_{0}_{1}", gpuFuncOp.getName(), en.index())); auto globalOp = rewriter.create( @@ -85,7 +85,7 @@ struct GPUFuncOpLowering : ConvertOpToLLVMPattern { // Rewrite workgroup memory attributions to addresses of global buffers. rewriter.setInsertionPointToStart(&gpuFuncOp.front()); unsigned numProperArguments = gpuFuncOp.getNumArguments(); - auto i32Type = LLVM::LLVMType::getInt32Ty(rewriter.getContext()); + auto i32Type = LLVM::LLVMIntegerType::get(rewriter.getContext(), 32); Value zero = nullptr; if (!workgroupBuffers.empty()) @@ -114,7 +114,7 @@ struct GPUFuncOpLowering : ConvertOpToLLVMPattern { // Rewrite private memory attributions to alloca'ed buffers. unsigned numWorkgroupAttributions = gpuFuncOp.getNumWorkgroupAttributions(); - auto int64Ty = LLVM::LLVMType::getInt64Ty(rewriter.getContext()); + auto int64Ty = LLVM::LLVMIntegerType::get(rewriter.getContext(), 64); for (auto en : llvm::enumerate(gpuFuncOp.getPrivateAttributions())) { Value attribution = en.value(); auto type = attribution.getType().cast(); diff --git a/mlir/lib/Conversion/GPUCommon/IndexIntrinsicsOpLowering.h b/mlir/lib/Conversion/GPUCommon/IndexIntrinsicsOpLowering.h index a51dff51cac4b..0a1e76b99dbe2 100644 --- a/mlir/lib/Conversion/GPUCommon/IndexIntrinsicsOpLowering.h +++ b/mlir/lib/Conversion/GPUCommon/IndexIntrinsicsOpLowering.h @@ -48,13 +48,16 @@ struct GPUIndexIntrinsicOpLowering : public ConvertOpToLLVMPattern { Value newOp; switch (dimensionToIndex(op)) { case X: - newOp = rewriter.create(loc, LLVM::LLVMType::getInt32Ty(context)); + newOp = + rewriter.create(loc, LLVM::LLVMIntegerType::get(context, 32)); break; case Y: - newOp = rewriter.create(loc, LLVM::LLVMType::getInt32Ty(context)); + newOp = + rewriter.create(loc, LLVM::LLVMIntegerType::get(context, 32)); break; case Z: - newOp = rewriter.create(loc, LLVM::LLVMType::getInt32Ty(context)); + newOp = + rewriter.create(loc, LLVM::LLVMIntegerType::get(context, 32)); break; default: return failure(); @@ -62,10 +65,10 @@ struct GPUIndexIntrinsicOpLowering : public ConvertOpToLLVMPattern { if (indexBitwidth > 32) { newOp = rewriter.create( - loc, LLVM::LLVMType::getIntNTy(context, indexBitwidth), newOp); + loc, LLVM::LLVMIntegerType::get(context, indexBitwidth), newOp); } else if (indexBitwidth < 32) { newOp = rewriter.create( - loc, LLVM::LLVMType::getIntNTy(context, indexBitwidth), newOp); + loc, LLVM::LLVMIntegerType::get(context, indexBitwidth), newOp); } rewriter.replaceOp(op, {newOp}); diff --git a/mlir/lib/Conversion/GPUCommon/OpToFuncCallLowering.h b/mlir/lib/Conversion/GPUCommon/OpToFuncCallLowering.h index b2887aa1d7829..631eca5cd32db 100644 --- a/mlir/lib/Conversion/GPUCommon/OpToFuncCallLowering.h +++ b/mlir/lib/Conversion/GPUCommon/OpToFuncCallLowering.h @@ -85,7 +85,7 @@ struct OpToFuncCallLowering : public ConvertOpToLLVMPattern { return operand; return rewriter.create( - operand.getLoc(), LLVM::LLVMType::getFloatTy(rewriter.getContext()), + operand.getLoc(), LLVM::LLVMFloatType::get(rewriter.getContext()), operand); } @@ -96,8 +96,7 @@ struct OpToFuncCallLowering : public ConvertOpToLLVMPattern { for (Value operand : operands) { operandTypes.push_back(operand.getType().cast()); } - return LLVMType::getFunctionTy(resultType, operandTypes, - /*isVarArg=*/false); + return LLVM::LLVMFunctionType::get(resultType, operandTypes); } StringRef getFunctionName(LLVM::LLVMType type) const { diff --git a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp index cea1cdc7e25fc..f747f519c66b6 100644 --- a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp +++ b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp @@ -57,10 +57,10 @@ struct GPUShuffleOpLowering : public ConvertOpToLLVMPattern { gpu::ShuffleOpAdaptor adaptor(operands); auto valueTy = adaptor.value().getType().cast(); - auto int32Type = LLVM::LLVMType::getInt32Ty(rewriter.getContext()); - auto predTy = LLVM::LLVMType::getInt1Ty(rewriter.getContext()); - auto resultTy = - LLVM::LLVMType::getStructTy(rewriter.getContext(), {valueTy, predTy}); + auto int32Type = LLVM::LLVMIntegerType::get(rewriter.getContext(), 32); + auto predTy = LLVM::LLVMIntegerType::get(rewriter.getContext(), 1); + auto resultTy = LLVM::LLVMStructType::getLiteral(rewriter.getContext(), + {valueTy, predTy}); Value one = rewriter.create( loc, int32Type, rewriter.getI32IntegerAttr(1)); diff --git a/mlir/lib/Conversion/GPUToVulkan/ConvertLaunchFuncToVulkanCalls.cpp b/mlir/lib/Conversion/GPUToVulkan/ConvertLaunchFuncToVulkanCalls.cpp index c676cd256d66a..4b657d25f51e3 100644 --- a/mlir/lib/Conversion/GPUToVulkan/ConvertLaunchFuncToVulkanCalls.cpp +++ b/mlir/lib/Conversion/GPUToVulkan/ConvertLaunchFuncToVulkanCalls.cpp @@ -57,11 +57,12 @@ class VulkanLaunchFuncToVulkanCallsPass VulkanLaunchFuncToVulkanCallsPass> { private: void initializeCachedTypes() { - llvmFloatType = LLVM::LLVMType::getFloatTy(&getContext()); - llvmVoidType = LLVM::LLVMType::getVoidTy(&getContext()); - llvmPointerType = LLVM::LLVMType::getInt8PtrTy(&getContext()); - llvmInt32Type = LLVM::LLVMType::getInt32Ty(&getContext()); - llvmInt64Type = LLVM::LLVMType::getInt64Ty(&getContext()); + llvmFloatType = LLVM::LLVMFloatType::get(&getContext()); + llvmVoidType = LLVM::LLVMVoidType::get(&getContext()); + llvmPointerType = LLVM::LLVMPointerType::get( + LLVM::LLVMIntegerType::get(&getContext(), 8)); + llvmInt32Type = LLVM::LLVMIntegerType::get(&getContext(), 32); + llvmInt64Type = LLVM::LLVMIntegerType::get(&getContext(), 64); } LLVM::LLVMType getMemRefType(uint32_t rank, LLVM::LLVMType elemenType) { @@ -77,12 +78,12 @@ class VulkanLaunchFuncToVulkanCallsPass // }; auto llvmPtrToElementType = LLVM::LLVMPointerType::get(elemenType); auto llvmArrayRankElementSizeType = - LLVM::LLVMType::getArrayTy(getInt64Type(), rank); + LLVM::LLVMArrayType::get(getInt64Type(), rank); // Create a type // `!llvm<"{ `element-type`*, `element-type`*, i64, // [`rank` x i64], [`rank` x i64]}">`. - return LLVM::LLVMType::getStructTy( + return LLVM::LLVMStructType::getLiteral( &getContext(), {llvmPtrToElementType, llvmPtrToElementType, getInt64Type(), llvmArrayRankElementSizeType, llvmArrayRankElementSizeType}); @@ -242,7 +243,7 @@ void VulkanLaunchFuncToVulkanCallsPass::createBindMemRefCalls( // int16_t and bitcast the descriptor. if (type.isa()) { auto memRefTy = - getMemRefType(rank, LLVM::LLVMType::getInt16Ty(&getContext())); + getMemRefType(rank, LLVM::LLVMIntegerType::get(&getContext(), 16)); ptrToMemRefDescriptor = builder.create( loc, LLVM::LLVMPointerType::get(memRefTy), ptrToMemRefDescriptor); } @@ -296,47 +297,46 @@ void VulkanLaunchFuncToVulkanCallsPass::declareVulkanFunctions(Location loc) { if (!module.lookupSymbol(kSetEntryPoint)) { builder.create( loc, kSetEntryPoint, - LLVM::LLVMType::getFunctionTy(getVoidType(), - {getPointerType(), getPointerType()}, - /*isVarArg=*/false)); + LLVM::LLVMFunctionType::get(getVoidType(), + {getPointerType(), getPointerType()})); } if (!module.lookupSymbol(kSetNumWorkGroups)) { builder.create( loc, kSetNumWorkGroups, - LLVM::LLVMType::getFunctionTy( - getVoidType(), - {getPointerType(), getInt64Type(), getInt64Type(), getInt64Type()}, - /*isVarArg=*/false)); + LLVM::LLVMFunctionType::get(getVoidType(), + {getPointerType(), getInt64Type(), + getInt64Type(), getInt64Type()})); } if (!module.lookupSymbol(kSetBinaryShader)) { builder.create( loc, kSetBinaryShader, - LLVM::LLVMType::getFunctionTy( - getVoidType(), {getPointerType(), getPointerType(), getInt32Type()}, - /*isVarArg=*/false)); + LLVM::LLVMFunctionType::get( + getVoidType(), + {getPointerType(), getPointerType(), getInt32Type()})); } if (!module.lookupSymbol(kRunOnVulkan)) { builder.create( loc, kRunOnVulkan, - LLVM::LLVMType::getFunctionTy(getVoidType(), {getPointerType()}, - /*isVarArg=*/false)); + LLVM::LLVMFunctionType::get(getVoidType(), {getPointerType()})); } for (unsigned i = 1; i <= 3; i++) { - for (LLVM::LLVMType type : {LLVM::LLVMType::getFloatTy(&getContext()), - LLVM::LLVMType::getInt32Ty(&getContext()), - LLVM::LLVMType::getInt16Ty(&getContext()), - LLVM::LLVMType::getInt8Ty(&getContext()), - LLVM::LLVMType::getHalfTy(&getContext())}) { + SmallVector types{ + LLVM::LLVMFloatType::get(&getContext()), + LLVM::LLVMIntegerType::get(&getContext(), 32), + LLVM::LLVMIntegerType::get(&getContext(), 16), + LLVM::LLVMIntegerType::get(&getContext(), 8), + LLVM::LLVMHalfType::get(&getContext())}; + for (auto type : types) { std::string fnName = "bindMemRef" + std::to_string(i) + "D" + std::string(stringifyType(type)); if (type.isa()) - type = LLVM::LLVMType::getInt16Ty(&getContext()); + type = LLVM::LLVMIntegerType::get(&getContext(), 16); if (!module.lookupSymbol(fnName)) { - auto fnType = LLVM::LLVMType::getFunctionTy( + auto fnType = LLVM::LLVMFunctionType::get( getVoidType(), {getPointerType(), getInt32Type(), getInt32Type(), LLVM::LLVMPointerType::get(getMemRefType(i, type))}, @@ -348,16 +348,13 @@ void VulkanLaunchFuncToVulkanCallsPass::declareVulkanFunctions(Location loc) { if (!module.lookupSymbol(kInitVulkan)) { builder.create( - loc, kInitVulkan, - LLVM::LLVMType::getFunctionTy(getPointerType(), {}, - /*isVarArg=*/false)); + loc, kInitVulkan, LLVM::LLVMFunctionType::get(getPointerType(), {})); } if (!module.lookupSymbol(kDeinitVulkan)) { builder.create( loc, kDeinitVulkan, - LLVM::LLVMType::getFunctionTy(getVoidType(), {getPointerType()}, - /*isVarArg=*/false)); + LLVM::LLVMFunctionType::get(getVoidType(), {getPointerType()})); } } diff --git a/mlir/lib/Conversion/LinalgToLLVM/LinalgToLLVM.cpp b/mlir/lib/Conversion/LinalgToLLVM/LinalgToLLVM.cpp index 5546c82a9e699..c86ae710aac63 100644 --- a/mlir/lib/Conversion/LinalgToLLVM/LinalgToLLVM.cpp +++ b/mlir/lib/Conversion/LinalgToLLVM/LinalgToLLVM.cpp @@ -86,7 +86,7 @@ static Type convertRangeType(RangeType t, LLVMTypeConverter &converter) { auto *context = t.getContext(); auto int64Ty = converter.convertType(IntegerType::get(context, 64)) .cast(); - return LLVMType::getStructTy(int64Ty, int64Ty, int64Ty); + return LLVMStructType::getLiteral(context, {int64Ty, int64Ty, int64Ty}); } namespace { diff --git a/mlir/lib/Conversion/SPIRVToLLVM/ConvertLaunchFuncToLLVMCalls.cpp b/mlir/lib/Conversion/SPIRVToLLVM/ConvertLaunchFuncToLLVMCalls.cpp index 1724c7044339d..3b4a8d66001d1 100644 --- a/mlir/lib/Conversion/SPIRVToLLVM/ConvertLaunchFuncToLLVMCalls.cpp +++ b/mlir/lib/Conversion/SPIRVToLLVM/ConvertLaunchFuncToLLVMCalls.cpp @@ -60,7 +60,7 @@ static unsigned calculateGlobalIndex(spirv::GlobalVariableOp op) { static void copy(Location loc, Value dst, Value src, Value size, OpBuilder &builder) { MLIRContext *context = builder.getContext(); - auto llvmI1Type = LLVM::LLVMType::getInt1Ty(context); + auto llvmI1Type = LLVM::LLVMIntegerType::get(context, 1); Value isVolatile = builder.create( loc, llvmI1Type, builder.getBoolAttr(false)); builder.create(loc, dst, src, size, isVolatile); @@ -183,9 +183,8 @@ class GPULaunchLowering : public ConvertOpToLLVMPattern { rewriter.setInsertionPointToStart(module.getBody()); kernelFunc = rewriter.create( rewriter.getUnknownLoc(), newKernelFuncName, - LLVM::LLVMType::getFunctionTy(LLVM::LLVMType::getVoidTy(context), - ArrayRef(), - /*isVarArg=*/false)); + LLVM::LLVMFunctionType::get(LLVM::LLVMVoidType::get(context), + ArrayRef())); rewriter.setInsertionPoint(launchOp); } diff --git a/mlir/lib/Conversion/SPIRVToLLVM/ConvertSPIRVToLLVM.cpp b/mlir/lib/Conversion/SPIRVToLLVM/ConvertSPIRVToLLVM.cpp index 7da9c47f92199..2633f4bdfe6fa 100644 --- a/mlir/lib/Conversion/SPIRVToLLVM/ConvertSPIRVToLLVM.cpp +++ b/mlir/lib/Conversion/SPIRVToLLVM/ConvertSPIRVToLLVM.cpp @@ -195,8 +195,8 @@ convertStructTypeWithOffset(spirv::StructType type, llvm::map_range(type.getElementTypes(), [&](Type elementType) { return converter.convertType(elementType).cast(); })); - return LLVM::LLVMType::getStructTy(type.getContext(), elementsVector, - /*isPacked=*/false); + return LLVM::LLVMStructType::getLiteral(type.getContext(), elementsVector, + /*isPacked=*/false); } /// Converts SPIR-V struct with no offset to packed LLVM struct. @@ -206,15 +206,15 @@ static Type convertStructTypePacked(spirv::StructType type, llvm::map_range(type.getElementTypes(), [&](Type elementType) { return converter.convertType(elementType).cast(); })); - return LLVM::LLVMType::getStructTy(type.getContext(), elementsVector, - /*isPacked=*/true); + return LLVM::LLVMStructType::getLiteral(type.getContext(), elementsVector, + /*isPacked=*/true); } /// Creates LLVM dialect constant with the given value. static Value createI32ConstantOf(Location loc, PatternRewriter &rewriter, unsigned value) { return rewriter.create( - loc, LLVM::LLVMType::getInt32Ty(rewriter.getContext()), + loc, LLVM::LLVMIntegerType::get(rewriter.getContext(), 32), rewriter.getIntegerAttr(rewriter.getI32Type(), value)); } @@ -258,7 +258,7 @@ static Optional convertArrayType(spirv::ArrayType type, auto llvmElementType = converter.convertType(elementType).cast(); unsigned numElements = type.getNumElements(); - return LLVM::LLVMType::getArrayTy(llvmElementType, numElements); + return LLVM::LLVMArrayType::get(llvmElementType, numElements); } /// Converts SPIR-V pointer type to LLVM pointer. Pointer's storage class is not @@ -279,7 +279,7 @@ static Optional convertRuntimeArrayType(spirv::RuntimeArrayType type, return llvm::None; auto elementType = converter.convertType(type.getElementType()).cast(); - return LLVM::LLVMType::getArrayTy(elementType, 0); + return LLVM::LLVMArrayType::get(elementType, 0); } /// Converts SPIR-V struct to LLVM struct. There is no support of structs with @@ -666,15 +666,15 @@ class ExecutionModePattern // int32_t executionMode; // int32_t values[]; // optional values // }; - auto llvmI32Type = LLVM::LLVMType::getInt32Ty(context); + auto llvmI32Type = LLVM::LLVMIntegerType::get(context, 32); SmallVector fields; fields.push_back(llvmI32Type); ArrayAttr values = op.values(); if (!values.empty()) { - auto arrayType = LLVM::LLVMType::getArrayTy(llvmI32Type, values.size()); + auto arrayType = LLVM::LLVMArrayType::get(llvmI32Type, values.size()); fields.push_back(arrayType); } - auto structType = LLVM::LLVMType::getStructTy(context, fields); + auto structType = LLVM::LLVMStructType::getLiteral(context, fields); // Create `llvm.mlir.global` with initializer region containing one block. auto global = rewriter.create( diff --git a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp index f4d1df81565bc..233c2eadc77c1 100644 --- a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp +++ b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp @@ -171,7 +171,7 @@ MLIRContext &LLVMTypeConverter::getContext() { } LLVM::LLVMType LLVMTypeConverter::getIndexType() { - return LLVM::LLVMType::getIntNTy(&getContext(), getIndexTypeBitwidth()); + return LLVM::LLVMIntegerType::get(&getContext(), getIndexTypeBitwidth()); } unsigned LLVMTypeConverter::getPointerBitwidth(unsigned addressSpace) { @@ -183,18 +183,18 @@ Type LLVMTypeConverter::convertIndexType(IndexType type) { } Type LLVMTypeConverter::convertIntegerType(IntegerType type) { - return LLVM::LLVMType::getIntNTy(&getContext(), type.getWidth()); + return LLVM::LLVMIntegerType::get(&getContext(), type.getWidth()); } Type LLVMTypeConverter::convertFloatType(FloatType type) { if (type.isa()) - return LLVM::LLVMType::getFloatTy(&getContext()); + return LLVM::LLVMFloatType::get(&getContext()); if (type.isa()) - return LLVM::LLVMType::getDoubleTy(&getContext()); + return LLVM::LLVMDoubleType::get(&getContext()); if (type.isa()) - return LLVM::LLVMType::getHalfTy(&getContext()); + return LLVM::LLVMHalfType::get(&getContext()); if (type.isa()) - return LLVM::LLVMType::getBFloatTy(&getContext()); + return LLVM::LLVMBFloatType::get(&getContext()); llvm_unreachable("non-float type in convertFloatType"); } @@ -206,7 +206,8 @@ static constexpr unsigned kRealPosInComplexNumberStruct = 0; static constexpr unsigned kImaginaryPosInComplexNumberStruct = 1; Type LLVMTypeConverter::convertComplexType(ComplexType type) { auto elementType = convertType(type.getElementType()).cast(); - return LLVM::LLVMType::getStructTy(&getContext(), {elementType, elementType}); + return LLVM::LLVMStructType::getLiteral(&getContext(), + {elementType, elementType}); } // Except for signatures, MLIR function types are converted into LLVM @@ -249,11 +250,11 @@ LLVM::LLVMType LLVMTypeConverter::convertFunctionSignature( // a struct. LLVM::LLVMType resultType = funcTy.getNumResults() == 0 - ? LLVM::LLVMType::getVoidTy(&getContext()) + ? LLVM::LLVMVoidType::get(&getContext()) : unwrap(packFunctionResults(funcTy.getResults())); if (!resultType) return {}; - return LLVM::LLVMType::getFunctionTy(resultType, argTypes, isVariadic); + return LLVM::LLVMFunctionType::get(resultType, argTypes, isVariadic); } /// Converts the function type to a C-compatible format, in particular using @@ -273,12 +274,12 @@ LLVMTypeConverter::convertFunctionTypeCWrapper(FunctionType type) { LLVM::LLVMType resultType = type.getNumResults() == 0 - ? LLVM::LLVMType::getVoidTy(&getContext()) + ? LLVM::LLVMVoidType::get(&getContext()) : unwrap(packFunctionResults(type.getResults())); if (!resultType) return {}; - return LLVM::LLVMType::getFunctionTy(resultType, inputs, false); + return LLVM::LLVMFunctionType::get(resultType, inputs); } static constexpr unsigned kAllocatedPtrPosInMemRefDescriptor = 0; @@ -335,7 +336,7 @@ LLVMTypeConverter::getMemRefDescriptorFields(MemRefType type, if (unpackAggregates) results.insert(results.end(), 2 * rank, indexTy); else - results.insert(results.end(), 2, LLVM::LLVMType::getArrayTy(indexTy, rank)); + results.insert(results.end(), 2, LLVM::LLVMArrayType::get(indexTy, rank)); return results; } @@ -346,7 +347,7 @@ Type LLVMTypeConverter::convertMemRefType(MemRefType type) { // unpack the `sizes` and `strides` arrays. SmallVector types = getMemRefDescriptorFields(type, /*unpackAggregates=*/false); - return LLVM::LLVMType::getStructTy(&getContext(), types); + return LLVM::LLVMStructType::getLiteral(&getContext(), types); } static constexpr unsigned kRankInUnrankedMemRefDescriptor = 0; @@ -361,12 +362,13 @@ static constexpr unsigned kPtrInUnrankedMemRefDescriptor = 1; /// be unranked. SmallVector LLVMTypeConverter::getUnrankedMemRefDescriptorFields() { - return {getIndexType(), LLVM::LLVMType::getInt8PtrTy(&getContext())}; + return {getIndexType(), LLVM::LLVMPointerType::get( + LLVM::LLVMIntegerType::get(&getContext(), 8))}; } Type LLVMTypeConverter::convertUnrankedMemRefType(UnrankedMemRefType type) { - return LLVM::LLVMType::getStructTy(&getContext(), - getUnrankedMemRefDescriptorFields()); + return LLVM::LLVMStructType::getLiteral(&getContext(), + getUnrankedMemRefDescriptorFields()); } /// Convert a memref type to a bare pointer to the memref element type. @@ -407,11 +409,11 @@ Type LLVMTypeConverter::convertVectorType(VectorType type) { auto elementType = unwrap(convertType(type.getElementType())); if (!elementType) return {}; - auto vectorType = - LLVM::LLVMType::getVectorTy(elementType, type.getShape().back()); + LLVM::LLVMType vectorType = + LLVM::LLVMFixedVectorType::get(elementType, type.getShape().back()); auto shape = type.getShape(); for (int i = shape.size() - 2; i >= 0; --i) - vectorType = LLVM::LLVMType::getArrayTy(vectorType, shape[i]); + vectorType = LLVM::LLVMArrayType::get(vectorType, shape[i]); return vectorType; } @@ -620,7 +622,7 @@ Value MemRefDescriptor::size(OpBuilder &builder, Location loc, Value pos, int64_t rank) { auto indexTy = indexType.cast(); auto indexPtrTy = LLVM::LLVMPointerType::get(indexTy); - auto arrayTy = LLVM::LLVMType::getArrayTy(indexTy, rank); + auto arrayTy = LLVM::LLVMArrayType::get(indexTy, rank); auto arrayPtrTy = LLVM::LLVMPointerType::get(arrayTy); // Copy size values to stack-allocated memory. @@ -949,8 +951,9 @@ Value UnrankedMemRefDescriptor::sizeBasePtr( Value memRefDescPtr, LLVM::LLVMPointerType elemPtrPtrType) { LLVM::LLVMType elemPtrTy = elemPtrPtrType.getElementType(); LLVM::LLVMType indexTy = typeConverter.getIndexType(); - LLVM::LLVMType structPtrTy = LLVM::LLVMPointerType::get( - LLVM::LLVMType::getStructTy(elemPtrTy, elemPtrTy, indexTy, indexTy)); + LLVM::LLVMType structPtrTy = + LLVM::LLVMPointerType::get(LLVM::LLVMStructType::getLiteral( + indexTy.getContext(), {elemPtrTy, elemPtrTy, indexTy, indexTy})); Value structPtr = builder.create(loc, structPtrTy, memRefDescPtr); @@ -1031,17 +1034,18 @@ LLVM::LLVMType ConvertToLLVMPattern::getIndexType() const { LLVM::LLVMType ConvertToLLVMPattern::getIntPtrType(unsigned addressSpace) const { - return LLVM::LLVMType::getIntNTy( + return LLVM::LLVMIntegerType::get( &getTypeConverter()->getContext(), getTypeConverter()->getPointerBitwidth(addressSpace)); } LLVM::LLVMType ConvertToLLVMPattern::getVoidType() const { - return LLVM::LLVMType::getVoidTy(&getTypeConverter()->getContext()); + return LLVM::LLVMVoidType::get(&getTypeConverter()->getContext()); } LLVM::LLVMType ConvertToLLVMPattern::getVoidPtrType() const { - return LLVM::LLVMType::getInt8PtrTy(&getTypeConverter()->getContext()); + return LLVM::LLVMPointerType::get( + LLVM::LLVMIntegerType::get(&getTypeConverter()->getContext(), 8)); } Value ConvertToLLVMPattern::createIndexConstant( @@ -1724,8 +1728,7 @@ struct AssertOpLowering : public ConvertOpToLLVMPattern { if (!abortFunc) { OpBuilder::InsertionGuard guard(rewriter); rewriter.setInsertionPointToStart(module.getBody()); - auto abortFuncTy = - LLVM::LLVMType::getFunctionTy(getVoidType(), {}, /*isVarArg=*/false); + auto abortFuncTy = LLVM::LLVMFunctionType::get(getVoidType(), {}); abortFunc = rewriter.create(rewriter.getUnknownLoc(), "abort", abortFuncTy); } @@ -1950,8 +1953,7 @@ struct AllocLikeOpLowering : public ConvertToLLVMPattern { for (Value param : params) paramTypes.push_back(param.getType().cast()); auto allocFuncType = - LLVM::LLVMType::getFunctionTy(getVoidPtrType(), paramTypes, - /*isVarArg=*/false); + LLVM::LLVMFunctionType::get(getVoidPtrType(), paramTypes); OpBuilder::InsertionGuard guard(rewriter); rewriter.setInsertionPointToStart(module.getBody()); allocFuncOp = rewriter.create(rewriter.getUnknownLoc(), @@ -2203,9 +2205,10 @@ static LogicalResult copyUnrankedDescriptors(OpBuilder &builder, Location loc, // Get frequently used types. MLIRContext *context = builder.getContext(); - auto voidType = LLVM::LLVMType::getVoidTy(context); - auto voidPtrType = LLVM::LLVMType::getInt8PtrTy(context); - auto i1Type = LLVM::LLVMType::getInt1Ty(context); + auto voidType = LLVM::LLVMVoidType::get(context); + LLVM::LLVMType voidPtrType = + LLVM::LLVMPointerType::get(LLVM::LLVMIntegerType::get(context, 8)); + auto i1Type = LLVM::LLVMIntegerType::get(context, 1); LLVM::LLVMType indexType = typeConverter.getIndexType(); // Find the malloc and free, or declare them if necessary. @@ -2216,8 +2219,8 @@ static LogicalResult copyUnrankedDescriptors(OpBuilder &builder, Location loc, builder.setInsertionPointToStart(module.getBody()); mallocFunc = builder.create( builder.getUnknownLoc(), "malloc", - LLVM::LLVMType::getFunctionTy( - voidPtrType, llvm::makeArrayRef(indexType), /*isVarArg=*/false)); + LLVM::LLVMFunctionType::get(voidPtrType, llvm::makeArrayRef(indexType), + /*isVarArg=*/false)); } auto freeFunc = module.lookupSymbol("free"); if (!freeFunc && !toDynamic) { @@ -2225,8 +2228,8 @@ static LogicalResult copyUnrankedDescriptors(OpBuilder &builder, Location loc, builder.setInsertionPointToStart(module.getBody()); freeFunc = builder.create( builder.getUnknownLoc(), "free", - LLVM::LLVMType::getFunctionTy(voidType, llvm::makeArrayRef(voidPtrType), - /*isVarArg=*/false)); + LLVM::LLVMFunctionType::get(voidType, llvm::makeArrayRef(voidPtrType), + /*isVarArg=*/false)); } // Initialize shared constants. @@ -2372,8 +2375,7 @@ struct DeallocOpLowering : public ConvertOpToLLVMPattern { op->getParentOfType().getBody()); freeFunc = rewriter.create( rewriter.getUnknownLoc(), "free", - LLVM::LLVMType::getFunctionTy(getVoidType(), getVoidPtrType(), - /*isVarArg=*/false)); + LLVM::LLVMFunctionType::get(getVoidType(), getVoidPtrType())); } MemRefDescriptor memref(transformed.memref()); @@ -2400,7 +2402,7 @@ convertGlobalMemrefTypeToLLVM(MemRefType type, LLVM::LLVMType arrayTy = elementType; // Shape has the outermost dim at index 0, so need to walk it backwards for (int64_t dim : llvm::reverse(type.getShape())) - arrayTy = LLVM::LLVMType::getArrayTy(arrayTy, dim); + arrayTy = LLVM::LLVMArrayType::get(arrayTy, dim); return arrayTy; } @@ -2855,7 +2857,7 @@ struct MemRefReshapeOpLowering Value zeroIndex = createIndexConstant(rewriter, loc, 0); Value pred = rewriter.create( - loc, LLVM::LLVMType::getInt1Ty(rewriter.getContext()), + loc, LLVM::LLVMIntegerType::get(rewriter.getContext(), 1), LLVM::ICmpPredicate::sge, indexArg, zeroIndex); Block *bodyBlock = @@ -3889,8 +3891,9 @@ struct GenericAtomicRMWOpLowering // Append the cmpxchg op to the end of the loop block. auto successOrdering = LLVM::AtomicOrdering::acq_rel; auto failureOrdering = LLVM::AtomicOrdering::monotonic; - auto boolType = LLVM::LLVMType::getInt1Ty(rewriter.getContext()); - auto pairType = LLVM::LLVMType::getStructTy(valueType, boolType); + auto boolType = LLVM::LLVMIntegerType::get(rewriter.getContext(), 1); + auto pairType = LLVM::LLVMStructType::getLiteral(rewriter.getContext(), + {valueType, boolType}); auto cmpxchg = rewriter.create( loc, pairType, dataPtr, loopArgument, result, successOrdering, failureOrdering); @@ -4067,13 +4070,13 @@ Type LLVMTypeConverter::packFunctionResults(ArrayRef types) { resultTypes.push_back(converted); } - return LLVM::LLVMType::getStructTy(&getContext(), resultTypes); + return LLVM::LLVMStructType::getLiteral(&getContext(), resultTypes); } Value LLVMTypeConverter::promoteOneMemRefDescriptor(Location loc, Value operand, OpBuilder &builder) { auto *context = builder.getContext(); - auto int64Ty = LLVM::LLVMType::getInt64Ty(builder.getContext()); + auto int64Ty = LLVM::LLVMIntegerType::get(builder.getContext(), 64); auto indexType = IndexType::get(context); // Alloca with proper alignment. We do not expect optimizations of this // alloca op and so we omit allocating at the entry block. diff --git a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp index bcc91e304e72d..b315417a420ba 100644 --- a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp +++ b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp @@ -209,7 +209,7 @@ static LogicalResult getIndexedPtrs(ConversionPatternRewriter &rewriter, if (failed(getBase(rewriter, loc, memref, memRefType, base))) return failure(); auto pType = MemRefDescriptor(memref).getElementPtrType(); - auto ptrsType = LLVM::LLVMType::getVectorTy(pType, vType.getDimSize(0)); + auto ptrsType = LLVM::LLVMFixedVectorType::get(pType, vType.getDimSize(0)); ptrs = rewriter.create(loc, ptrsType, base, indices); return success(); } @@ -748,7 +748,7 @@ class VectorExtractOpConversion // Remaining extraction of element from 1-D LLVM vector auto position = positionAttrs.back().cast(); - auto i64Type = LLVM::LLVMType::getInt64Ty(rewriter.getContext()); + auto i64Type = LLVM::LLVMIntegerType::get(rewriter.getContext(), 64); auto constant = rewriter.create(loc, i64Type, position); extracted = rewriter.create(loc, extracted, constant); @@ -856,7 +856,7 @@ class VectorInsertOpConversion } // Insertion of an element into a 1-D LLVM vector. - auto i64Type = LLVM::LLVMType::getInt64Ty(rewriter.getContext()); + auto i64Type = LLVM::LLVMIntegerType::get(rewriter.getContext(), 64); auto constant = rewriter.create(loc, i64Type, position); Value inserted = rewriter.create( loc, typeConverter->convertType(oneDVectorType), extracted, @@ -1123,7 +1123,7 @@ class VectorTypeCastOpConversion })) return failure(); - auto int64Ty = LLVM::LLVMType::getInt64Ty(rewriter.getContext()); + auto int64Ty = LLVM::LLVMIntegerType::get(rewriter.getContext(), 64); // Create descriptor. auto desc = MemRefDescriptor::undef(rewriter, loc, llvmTargetDescriptorTy); @@ -1362,11 +1362,11 @@ class VectorPrintOpConversion : public ConvertOpToLLVMPattern { switch (conversion) { case PrintConversion::ZeroExt64: value = rewriter.create( - loc, value, LLVM::LLVMType::getInt64Ty(rewriter.getContext())); + loc, value, LLVM::LLVMIntegerType::get(rewriter.getContext(), 64)); break; case PrintConversion::SignExt64: value = rewriter.create( - loc, value, LLVM::LLVMType::getInt64Ty(rewriter.getContext())); + loc, value, LLVM::LLVMIntegerType::get(rewriter.getContext(), 64)); break; case PrintConversion::None: break; @@ -1410,27 +1410,25 @@ class VectorPrintOpConversion : public ConvertOpToLLVMPattern { OpBuilder moduleBuilder(module.getBodyRegion()); return moduleBuilder.create( op->getLoc(), name, - LLVM::LLVMType::getFunctionTy( - LLVM::LLVMType::getVoidTy(op->getContext()), params, - /*isVarArg=*/false)); + LLVM::LLVMFunctionType::get(LLVM::LLVMVoidType::get(op->getContext()), + params)); } // Helpers for method names. Operation *getPrintI64(Operation *op) const { return getPrint(op, "printI64", - LLVM::LLVMType::getInt64Ty(op->getContext())); + LLVM::LLVMIntegerType::get(op->getContext(), 64)); } Operation *getPrintU64(Operation *op) const { return getPrint(op, "printU64", - LLVM::LLVMType::getInt64Ty(op->getContext())); + LLVM::LLVMIntegerType::get(op->getContext(), 64)); } Operation *getPrintFloat(Operation *op) const { - return getPrint(op, "printF32", - LLVM::LLVMType::getFloatTy(op->getContext())); + return getPrint(op, "printF32", LLVM::LLVMFloatType::get(op->getContext())); } Operation *getPrintDouble(Operation *op) const { return getPrint(op, "printF64", - LLVM::LLVMType::getDoubleTy(op->getContext())); + LLVM::LLVMDoubleType::get(op->getContext())); } Operation *getPrintOpen(Operation *op) const { return getPrint(op, "printOpen", {}); diff --git a/mlir/lib/Conversion/VectorToROCDL/VectorToROCDL.cpp b/mlir/lib/Conversion/VectorToROCDL/VectorToROCDL.cpp index 1335f33e10aa7..3e3ddc6aaff67 100644 --- a/mlir/lib/Conversion/VectorToROCDL/VectorToROCDL.cpp +++ b/mlir/lib/Conversion/VectorToROCDL/VectorToROCDL.cpp @@ -121,7 +121,7 @@ class VectorTransferConversion : public ConvertOpToLLVMPattern { Type i64Ty = rewriter.getIntegerType(64); Value i64x2Ty = rewriter.create( loc, - LLVM::LLVMType::getVectorTy( + LLVM::LLVMFixedVectorType::get( toLLVMTy(i64Ty).template cast(), 2), constConfig); Value dataPtrAsI64 = rewriter.create( @@ -129,7 +129,7 @@ class VectorTransferConversion : public ConvertOpToLLVMPattern { Value zero = this->createIndexConstant(rewriter, loc, 0); Value dwordConfig = rewriter.create( loc, - LLVM::LLVMType::getVectorTy( + LLVM::LLVMFixedVectorType::get( toLLVMTy(i64Ty).template cast(), 2), i64x2Ty, dataPtrAsI64, zero); dwordConfig = diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp index 2bdbb877ec84c..765538ca7a537 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp @@ -101,12 +101,13 @@ static ParseResult parseCmpOp(OpAsmParser &parser, OperationState &result) { // The result type is either i1 or a vector type if the inputs are // vectors. - auto resultType = LLVMType::getInt1Ty(builder.getContext()); + LLVMType resultType = LLVMIntegerType::get(builder.getContext(), 1); auto argType = type.dyn_cast(); if (!argType) return parser.emitError(trailingTypeLoc, "expected LLVM IR dialect type"); if (auto vecArgType = argType.dyn_cast()) - resultType = LLVMType::getVectorTy(resultType, vecArgType.getNumElements()); + resultType = + LLVMFixedVectorType::get(resultType, vecArgType.getNumElements()); assert(!argType.isa() && "unhandled scalable vector"); @@ -547,7 +548,7 @@ static ParseResult parseInvokeOp(OpAsmParser &parser, OperationState &result) { LLVM::LLVMType llvmResultType; if (funcType.getNumResults() == 0) { - llvmResultType = LLVM::LLVMType::getVoidTy(builder.getContext()); + llvmResultType = LLVM::LLVMVoidType::get(builder.getContext()); } else { llvmResultType = funcType.getResult(0).dyn_cast(); if (!llvmResultType) @@ -565,8 +566,7 @@ static ParseResult parseInvokeOp(OpAsmParser &parser, OperationState &result) { "expected LLVM types as inputs"); } - auto llvmFuncType = LLVM::LLVMType::getFunctionTy(llvmResultType, argTypes, - /*isVarArg=*/false); + auto llvmFuncType = LLVM::LLVMFunctionType::get(llvmResultType, argTypes); auto wrappedFuncType = LLVM::LLVMPointerType::get(llvmFuncType); auto funcArguments = llvm::makeArrayRef(operands).drop_front(); @@ -827,7 +827,7 @@ static ParseResult parseCallOp(OpAsmParser &parser, OperationState &result) { Builder &builder = parser.getBuilder(); LLVM::LLVMType llvmResultType; if (funcType.getNumResults() == 0) { - llvmResultType = LLVM::LLVMType::getVoidTy(builder.getContext()); + llvmResultType = LLVM::LLVMVoidType::get(builder.getContext()); } else { llvmResultType = funcType.getResult(0).dyn_cast(); if (!llvmResultType) @@ -844,8 +844,7 @@ static ParseResult parseCallOp(OpAsmParser &parser, OperationState &result) { "expected LLVM types as inputs"); argTypes.push_back(argType); } - auto llvmFuncType = LLVM::LLVMType::getFunctionTy(llvmResultType, argTypes, - /*isVarArg=*/false); + auto llvmFuncType = LLVM::LLVMFunctionType::get(llvmResultType, argTypes); auto wrappedFuncType = LLVM::LLVMPointerType::get(llvmFuncType); auto funcArguments = @@ -1477,8 +1476,8 @@ static ParseResult parseGlobalOp(OpAsmParser &parser, OperationState &result) { if (types.empty()) { if (auto strAttr = value.dyn_cast_or_null()) { MLIRContext *context = parser.getBuilder().getContext(); - auto arrayType = LLVM::LLVMType::getArrayTy( - LLVM::LLVMType::getInt8Ty(context), strAttr.getValue().size()); + auto arrayType = LLVM::LLVMArrayType::get( + LLVM::LLVMIntegerType::get(context, 8), strAttr.getValue().size()); types.push_back(arrayType); } else { return parser.emitError(parser.getNameLoc(), @@ -1539,7 +1538,7 @@ void LLVM::ShuffleVectorOp::build(OpBuilder &b, OperationState &result, ArrayRef attrs) { auto containerType = v1.getType().cast(); auto vType = - LLVMType::getVectorTy(containerType.getElementType(), mask.size()); + LLVMFixedVectorType::get(containerType.getElementType(), mask.size()); build(b, result, vType, v1, v2, mask); result.addAttributes(attrs); } @@ -1574,7 +1573,7 @@ static ParseResult parseShuffleVectorOp(OpAsmParser &parser, return parser.emitError( loc, "expected LLVM IR dialect vector type for operand #1"); auto vType = - LLVMType::getVectorTy(containerType.getElementType(), maskAttr.size()); + LLVMFixedVectorType::get(containerType.getElementType(), maskAttr.size()); result.addTypes(vType); return success(); } @@ -1646,15 +1645,15 @@ static Type buildLLVMFunctionType(OpAsmParser &parser, llvm::SMLoc loc, } // No output is denoted as "void" in LLVM type system. - LLVMType llvmOutput = outputs.empty() ? LLVMType::getVoidTy(b.getContext()) + LLVMType llvmOutput = outputs.empty() ? LLVMVoidType::get(b.getContext()) : outputs.front().dyn_cast(); if (!llvmOutput) { parser.emitError(loc, "failed to construct function type: expected LLVM " "type for function results"); return {}; } - return LLVMType::getFunctionTy(llvmOutput, llvmInputs, - variadicFlag.isVariadic()); + return LLVMFunctionType::get(llvmOutput, llvmInputs, + variadicFlag.isVariadic()); } // Parses an LLVM function. @@ -1970,8 +1969,9 @@ static ParseResult parseAtomicCmpXchgOp(OpAsmParser &parser, parser.resolveOperand(val, type, result.operands)) return failure(); - auto boolType = LLVMType::getInt1Ty(builder.getContext()); - auto resultType = LLVMType::getStructTy(type, boolType); + auto boolType = LLVMIntegerType::get(builder.getContext(), 1); + auto resultType = + LLVMStructType::getLiteral(builder.getContext(), {type, boolType}); result.addTypes(resultType); return success(); @@ -2159,8 +2159,8 @@ Value mlir::LLVM::createGlobalString(Location loc, OpBuilder &builder, // Create the global at the entry of the module. OpBuilder moduleBuilder(module.getBodyRegion()); MLIRContext *ctx = builder.getContext(); - auto type = - LLVM::LLVMType::getArrayTy(LLVM::LLVMType::getInt8Ty(ctx), value.size()); + auto type = LLVM::LLVMArrayType::get(LLVM::LLVMIntegerType::get(ctx, 8), + value.size()); auto global = moduleBuilder.create( loc, type, /*isConstant=*/true, linkage, name, builder.getStringAttr(value)); @@ -2168,10 +2168,11 @@ Value mlir::LLVM::createGlobalString(Location loc, OpBuilder &builder, // Get the pointer to the first character in the global string. Value globalPtr = builder.create(loc, global); Value cst0 = builder.create( - loc, LLVM::LLVMType::getInt64Ty(ctx), + loc, LLVM::LLVMIntegerType::get(ctx, 64), builder.getIntegerAttr(builder.getIndexType(), 0)); - return builder.create(loc, LLVM::LLVMType::getInt8PtrTy(ctx), - globalPtr, ValueRange{cst0, cst0}); + return builder.create( + loc, LLVM::LLVMPointerType::get(LLVMIntegerType::get(ctx, 8)), globalPtr, + ValueRange{cst0, cst0}); } bool mlir::LLVM::satisfiesLLVMModule(Operation *op) { diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp index 0616efb7ef3f9..3d75245a1fb36 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp @@ -36,106 +36,9 @@ LLVMDialect &LLVMType::getDialect() { return static_cast(Type::getDialect()); } -//----------------------------------------------------------------------------// -// Utilities used to generate floating point types. - -LLVMType LLVMType::getDoubleTy(MLIRContext *context) { - return LLVMDoubleType::get(context); -} - -LLVMType LLVMType::getFloatTy(MLIRContext *context) { - return LLVMFloatType::get(context); -} - -LLVMType LLVMType::getBFloatTy(MLIRContext *context) { - return LLVMBFloatType::get(context); -} - -LLVMType LLVMType::getHalfTy(MLIRContext *context) { - return LLVMHalfType::get(context); -} - -LLVMType LLVMType::getFP128Ty(MLIRContext *context) { - return LLVMFP128Type::get(context); -} - -LLVMType LLVMType::getX86_FP80Ty(MLIRContext *context) { - return LLVMX86FP80Type::get(context); -} - -//----------------------------------------------------------------------------// -// Utilities used to generate integer types. - -LLVMType LLVMType::getIntNTy(MLIRContext *context, unsigned numBits) { - return LLVMIntegerType::get(context, numBits); -} - -LLVMType LLVMType::getInt8PtrTy(MLIRContext *context) { - return LLVMPointerType::get(LLVMIntegerType::get(context, 8)); -} - -//----------------------------------------------------------------------------// -// Utilities used to generate other miscellaneous types. - -LLVMType LLVMType::getArrayTy(LLVMType elementType, uint64_t numElements) { - return LLVMArrayType::get(elementType, numElements); -} - -LLVMType LLVMType::getFunctionTy(LLVMType result, ArrayRef params, - bool isVarArg) { - return LLVMFunctionType::get(result, params, isVarArg); -} - -LLVMType LLVMType::getStructTy(MLIRContext *context, - ArrayRef elements, bool isPacked) { - return LLVMStructType::getLiteral(context, elements, isPacked); -} - -LLVMType LLVMType::getVectorTy(LLVMType elementType, unsigned numElements) { - return LLVMFixedVectorType::get(elementType, numElements); -} - -//----------------------------------------------------------------------------// -// Void type utilities. - -LLVMType LLVMType::getVoidTy(MLIRContext *context) { - return LLVMVoidType::get(context); -} - -//----------------------------------------------------------------------------// -// Creation and setting of LLVM's identified struct types - -LLVMType LLVMType::createStructTy(MLIRContext *context, - ArrayRef elements, - Optional name, bool isPacked) { - assert(name.hasValue() && - "identified structs with no identifier not supported"); - StringRef stringNameBase = name.getValueOr(""); - std::string stringName = stringNameBase.str(); - unsigned counter = 0; - do { - auto type = LLVMStructType::getIdentified(context, stringName); - if (type.isInitialized() || failed(type.setBody(elements, isPacked))) { - counter += 1; - stringName = - (Twine(stringNameBase) + "." + std::to_string(counter)).str(); - continue; - } - return type; - } while (true); -} - -LLVMType LLVMType::setStructTyBody(LLVMType structType, - ArrayRef elements, bool isPacked) { - LogicalResult couldSet = - structType.cast().setBody(elements, isPacked); - assert(succeeded(couldSet) && "failed to set the body"); - (void)couldSet; - return structType; -} - //===----------------------------------------------------------------------===// // Array type. +//===----------------------------------------------------------------------===// bool LLVMArrayType::isValidElementType(LLVMType type) { return !type.isa(); @@ -222,6 +126,7 @@ LogicalResult LLVMFunctionType::verifyConstructionInvariants( //===----------------------------------------------------------------------===// // Integer type. +//===----------------------------------------------------------------------===// LLVMIntegerType LLVMIntegerType::get(MLIRContext *ctx, unsigned bitwidth) { return Base::get(ctx, bitwidth); @@ -243,6 +148,7 @@ LogicalResult LLVMIntegerType::verifyConstructionInvariants(Location loc, //===----------------------------------------------------------------------===// // Pointer type. +//===----------------------------------------------------------------------===// bool LLVMPointerType::isValidElementType(LLVMType type) { return !type.isa elements, + bool isPacked) { + std::string stringName = name.str(); + unsigned counter = 0; + do { + auto type = LLVMStructType::getIdentified(context, stringName); + if (type.isInitialized() || failed(type.setBody(elements, isPacked))) { + counter += 1; + stringName = (Twine(name) + "." + std::to_string(counter)).str(); + continue; + } + return type; + } while (true); +} + LLVMStructType LLVMStructType::getLiteral(MLIRContext *context, ArrayRef types, bool isPacked) { @@ -346,6 +270,7 @@ LLVMStructType::verifyConstructionInvariants(Location loc, //===----------------------------------------------------------------------===// // Vector types. +//===----------------------------------------------------------------------===// bool LLVMVectorType::isValidElementType(LLVMType type) { return type.isa() || diff --git a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp index c202075fa2066..c2f689be493aa 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp @@ -63,7 +63,8 @@ static ParseResult parseNVVMShflSyncBflyOp(OpAsmParser &parser, break; } - auto int32Ty = LLVM::LLVMType::getInt32Ty(parser.getBuilder().getContext()); + auto int32Ty = + LLVM::LLVMIntegerType::get(parser.getBuilder().getContext(), 32); return parser.resolveOperands(ops, {int32Ty, type, int32Ty, int32Ty}, parser.getNameLoc(), result.operands); } @@ -72,8 +73,8 @@ static ParseResult parseNVVMShflSyncBflyOp(OpAsmParser &parser, static ParseResult parseNVVMVoteBallotOp(OpAsmParser &parser, OperationState &result) { MLIRContext *context = parser.getBuilder().getContext(); - auto int32Ty = LLVM::LLVMType::getInt32Ty(context); - auto int1Ty = LLVM::LLVMType::getInt1Ty(context); + auto int32Ty = LLVM::LLVMIntegerType::get(context, 32); + auto int1Ty = LLVM::LLVMIntegerType::get(context, 1); SmallVector ops; Type type; @@ -87,12 +88,12 @@ static ParseResult parseNVVMVoteBallotOp(OpAsmParser &parser, static LogicalResult verify(MmaOp op) { MLIRContext *context = op.getContext(); - auto f16Ty = LLVM::LLVMType::getHalfTy(context); - auto f16x2Ty = LLVM::LLVMType::getVectorTy(f16Ty, 2); - auto f32Ty = LLVM::LLVMType::getFloatTy(context); - auto f16x2x4StructTy = LLVM::LLVMType::getStructTy( + auto f16Ty = LLVM::LLVMHalfType::get(context); + auto f16x2Ty = LLVM::LLVMFixedVectorType::get(f16Ty, 2); + auto f32Ty = LLVM::LLVMFloatType::get(context); + auto f16x2x4StructTy = LLVM::LLVMStructType::getLiteral( context, {f16x2Ty, f16x2Ty, f16x2Ty, f16x2Ty}); - auto f32x8StructTy = LLVM::LLVMType::getStructTy( + auto f32x8StructTy = LLVM::LLVMStructType::getLiteral( context, {f32Ty, f32Ty, f32Ty, f32Ty, f32Ty, f32Ty, f32Ty, f32Ty}); SmallVector operand_types(op.getOperandTypes().begin(), diff --git a/mlir/lib/Dialect/LLVMIR/IR/ROCDLDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/ROCDLDialect.cpp index 8d0c96ce2aa1a..f50c49f03a079 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/ROCDLDialect.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/ROCDLDialect.cpp @@ -46,9 +46,9 @@ static ParseResult parseROCDLMubufLoadOp(OpAsmParser &parser, return failure(); MLIRContext *context = parser.getBuilder().getContext(); - auto int32Ty = LLVM::LLVMType::getInt32Ty(context); - auto int1Ty = LLVM::LLVMType::getInt1Ty(context); - auto i32x4Ty = LLVM::LLVMType::getVectorTy(int32Ty, 4); + auto int32Ty = LLVM::LLVMIntegerType::get(context, 32); + auto int1Ty = LLVM::LLVMIntegerType::get(context, 1); + auto i32x4Ty = LLVM::LLVMFixedVectorType::get(int32Ty, 4); return parser.resolveOperands(ops, {i32x4Ty, int32Ty, int32Ty, int1Ty, int1Ty}, parser.getNameLoc(), result.operands); @@ -65,9 +65,9 @@ static ParseResult parseROCDLMubufStoreOp(OpAsmParser &parser, return failure(); MLIRContext *context = parser.getBuilder().getContext(); - auto int32Ty = LLVM::LLVMType::getInt32Ty(context); - auto int1Ty = LLVM::LLVMType::getInt1Ty(context); - auto i32x4Ty = LLVM::LLVMType::getVectorTy(int32Ty, 4); + auto int32Ty = LLVM::LLVMIntegerType::get(context, 32); + auto int1Ty = LLVM::LLVMIntegerType::get(context, 1); + auto i32x4Ty = LLVM::LLVMFixedVectorType::get(int32Ty, 4); if (parser.resolveOperands(ops, {type, i32x4Ty, int32Ty, int32Ty, int1Ty, int1Ty}, diff --git a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp index 0b2cf7de270fa..c28588d32ad6c 100644 --- a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp @@ -769,13 +769,12 @@ LogicalResult ModuleTranslation::convertOperation(Operation &opInst, LLVM::LLVMType resultType; if (inlineAsmOp.getNumResults() == 0) { - resultType = LLVM::LLVMType::getVoidTy(mlirModule->getContext()); + resultType = LLVM::LLVMVoidType::get(mlirModule->getContext()); } else { assert(inlineAsmOp.getNumResults() == 1); resultType = inlineAsmOp.getResultTypes()[0].cast(); } - auto ft = LLVM::LLVMType::getFunctionTy(resultType, operandTypes, - /*isVarArg=*/false); + auto ft = LLVM::LLVMFunctionType::get(resultType, operandTypes); llvm::InlineAsm *inlineAsmInst = inlineAsmOp.asm_dialect().hasValue() ? llvm::InlineAsm::get( diff --git a/mlir/test/lib/Transforms/TestConvertCallOp.cpp b/mlir/test/lib/Transforms/TestConvertCallOp.cpp index 61062c7938fe2..82cc95aac8a82 100644 --- a/mlir/test/lib/Transforms/TestConvertCallOp.cpp +++ b/mlir/test/lib/Transforms/TestConvertCallOp.cpp @@ -45,7 +45,8 @@ class TestConvertCallOp // Populate type conversions. LLVMTypeConverter type_converter(m.getContext()); type_converter.addConversion([&](test::TestType type) { - return LLVM::LLVMType::getInt8PtrTy(m.getContext()); + return LLVM::LLVMPointerType::get( + LLVM::LLVMIntegerType::get(m.getContext(), 8)); }); // Populate patterns. From c3acda0798f9b10ac3187ad941bbd8af82fb84a1 Mon Sep 17 00:00:00 2001 From: Simon Moll Date: Wed, 23 Dec 2020 13:27:55 +0100 Subject: [PATCH 168/378] [VE] Vector 'and' isel and tests Reviewed By: kaz7 Differential Revision: https://reviews.llvm.org/D93709 --- llvm/lib/Target/VE/VVPInstrInfo.td | 3 + llvm/lib/Target/VE/VVPInstrPatternsVec.td | 3 + llvm/lib/Target/VE/VVPNodes.def | 1 + llvm/test/CodeGen/VE/Vector/vec_and.ll | 132 ++++++++++++++++++++++ 4 files changed, 139 insertions(+) create mode 100644 llvm/test/CodeGen/VE/Vector/vec_and.ll diff --git a/llvm/lib/Target/VE/VVPInstrInfo.td b/llvm/lib/Target/VE/VVPInstrInfo.td index 81fbfe03b48f3..2c88d5099a7b6 100644 --- a/llvm/lib/Target/VE/VVPInstrInfo.td +++ b/llvm/lib/Target/VE/VVPInstrInfo.td @@ -40,4 +40,7 @@ class vvp_commutative : def vvp_add : SDNode<"VEISD::VVP_ADD", SDTIntBinOpVVP>; def c_vvp_add : vvp_commutative; +def vvp_and : SDNode<"VEISD::VVP_AND", SDTIntBinOpVVP>; +def c_vvp_and : vvp_commutative; + // } Binary Operators diff --git a/llvm/lib/Target/VE/VVPInstrPatternsVec.td b/llvm/lib/Target/VE/VVPInstrPatternsVec.td index 2345173314a4a..7003fb387670c 100644 --- a/llvm/lib/Target/VE/VVPInstrPatternsVec.td +++ b/llvm/lib/Target/VE/VVPInstrPatternsVec.td @@ -66,3 +66,6 @@ multiclass VectorBinaryArith_ShortLong< defm : VectorBinaryArith_ShortLong; +defm : VectorBinaryArith_ShortLong; diff --git a/llvm/lib/Target/VE/VVPNodes.def b/llvm/lib/Target/VE/VVPNodes.def index 4319b332388e1..1f9cbd7902351 100644 --- a/llvm/lib/Target/VE/VVPNodes.def +++ b/llvm/lib/Target/VE/VVPNodes.def @@ -27,6 +27,7 @@ // Integer arithmetic. ADD_BINARY_VVP_OP(VVP_ADD,ADD) +ADD_BINARY_VVP_OP(VVP_AND,AND) #undef ADD_BINARY_VVP_OP #undef ADD_VVP_OP diff --git a/llvm/test/CodeGen/VE/Vector/vec_and.ll b/llvm/test/CodeGen/VE/Vector/vec_and.ll new file mode 100644 index 0000000000000..8597e1aa511ed --- /dev/null +++ b/llvm/test/CodeGen/VE/Vector/vec_and.ll @@ -0,0 +1,132 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=ve -mattr=+vpu | FileCheck %s + +; <256 x i32> + +; Function Attrs: nounwind +define fastcc <256 x i32> @and_vv_v256i32(<256 x i32> %x, <256 x i32> %y) { +; CHECK-LABEL: and_vv_v256i32: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s0, 256 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: pvand.lo %v0, %v0, %v1 +; CHECK-NEXT: b.l.t (, %s10) + %z = and <256 x i32> %x, %y + ret <256 x i32> %z +} + +; Function Attrs: nounwind +define fastcc <256 x i32> @and_sv_v256i32(i32 %x, <256 x i32> %y) { +; CHECK-LABEL: and_sv_v256i32: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: lvl %s1 +; CHECK-NEXT: pvand.lo %v0, %s0, %v0 +; CHECK-NEXT: b.l.t (, %s10) + %xins = insertelement <256 x i32> undef, i32 %x, i32 0 + %vx = shufflevector <256 x i32> %xins, <256 x i32> undef, <256 x i32> zeroinitializer + %z = and <256 x i32> %vx, %y + ret <256 x i32> %z +} + +; Function Attrs: nounwind +define fastcc <256 x i32> @and_vs_v256i32(<256 x i32> %x, i32 %y) { +; CHECK-LABEL: and_vs_v256i32: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: lvl %s1 +; CHECK-NEXT: pvand.lo %v0, %s0, %v0 +; CHECK-NEXT: b.l.t (, %s10) + %yins = insertelement <256 x i32> undef, i32 %y, i32 0 + %vy = shufflevector <256 x i32> %yins, <256 x i32> undef, <256 x i32> zeroinitializer + %z = and <256 x i32> %x, %vy + ret <256 x i32> %z +} + + + +; <256 x i64> + +; Function Attrs: nounwind +define fastcc <256 x i64> @and_vv_v256i64(<256 x i64> %x, <256 x i64> %y) { +; CHECK-LABEL: and_vv_v256i64: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s0, 256 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: vand %v0, %v0, %v1 +; CHECK-NEXT: b.l.t (, %s10) + %z = and <256 x i64> %x, %y + ret <256 x i64> %z +} + +; Function Attrs: nounwind +define fastcc <256 x i64> @and_sv_v256i64(i64 %x, <256 x i64> %y) { +; CHECK-LABEL: and_sv_v256i64: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: lvl %s1 +; CHECK-NEXT: vand %v0, %s0, %v0 +; CHECK-NEXT: b.l.t (, %s10) + %xins = insertelement <256 x i64> undef, i64 %x, i32 0 + %vx = shufflevector <256 x i64> %xins, <256 x i64> undef, <256 x i32> zeroinitializer + %z = and <256 x i64> %vx, %y + ret <256 x i64> %z +} + +; Function Attrs: nounwind +define fastcc <256 x i64> @and_vs_v256i64(<256 x i64> %x, i64 %y) { +; CHECK-LABEL: and_vs_v256i64: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: lvl %s1 +; CHECK-NEXT: vand %v0, %s0, %v0 +; CHECK-NEXT: b.l.t (, %s10) + %yins = insertelement <256 x i64> undef, i64 %y, i32 0 + %vy = shufflevector <256 x i64> %yins, <256 x i64> undef, <256 x i32> zeroinitializer + %z = and <256 x i64> %x, %vy + ret <256 x i64> %z +} + +; <128 x i64> +; We expect this to be widened. + +; Function Attrs: nounwind +define fastcc <128 x i64> @and_vv_v128i64(<128 x i64> %x, <128 x i64> %y) { +; CHECK-LABEL: and_vv_v128i64: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s0, 256 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: vand %v0, %v0, %v1 +; CHECK-NEXT: b.l.t (, %s10) + %z = and <128 x i64> %x, %y + ret <128 x i64> %z +} + +; <256 x i16> +; We expect promotion. + +; Function Attrs: nounwind +define fastcc <256 x i16> @and_vv_v256i16(<256 x i16> %x, <256 x i16> %y) { +; CHECK-LABEL: and_vv_v256i16: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s0, 256 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: pvand.lo %v0, %v0, %v1 +; CHECK-NEXT: b.l.t (, %s10) + %z = and <256 x i16> %x, %y + ret <256 x i16> %z +} + +; <128 x i16> +; We expect this to be scalarized (for now). + +; Function Attrs: nounwind +define fastcc <128 x i16> @and_vv_v128i16(<128 x i16> %x, <128 x i16> %y) { +; CHECK-LABEL: and_vv_v128i16: +; CHECK-NOT: vand + %z = and <128 x i16> %x, %y + ret <128 x i16> %z +} + From acaa6e4260cb5b2aa88f465eafea320d5f3f249c Mon Sep 17 00:00:00 2001 From: Simon Moll Date: Wed, 23 Dec 2020 14:21:41 +0100 Subject: [PATCH 169/378] [NFC] Uniquify 'const' in TargetTransformInfoImpl.h Some member functions of class TargetTransformInfoImplBase in TargetTransformInfoImpl.h are marked const while others are not. Yet all of the should be marked const since they are just providing default TTI values. This patch fixes the inconsistency. Authored-by: Jinzheng Tu Reviewed By: simoll Differential revision: https://reviews.llvm.org/D93573 --- .../llvm/Analysis/TargetTransformInfoImpl.h | 196 ++++++++++-------- 1 file changed, 109 insertions(+), 87 deletions(-) diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index 6415e7bfe7c30..620bfb885b54b 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -46,7 +46,7 @@ class TargetTransformInfoImplBase { int getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef Operands, - TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) { + TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) const { // In the basic model, we just assume that all-constant GEPs will be folded // into their uses via addressing modes. for (unsigned Idx = 0, Size = Operands.size(); Idx != Size; ++Idx) @@ -59,28 +59,30 @@ class TargetTransformInfoImplBase { unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize, ProfileSummaryInfo *PSI, - BlockFrequencyInfo *BFI) { + BlockFrequencyInfo *BFI) const { (void)PSI; (void)BFI; JTSize = 0; return SI.getNumCases(); } - unsigned getInliningThresholdMultiplier() { return 1; } + unsigned getInliningThresholdMultiplier() const { return 1; } - int getInlinerVectorBonusPercent() { return 150; } + int getInlinerVectorBonusPercent() const { return 150; } - unsigned getMemcpyCost(const Instruction *I) { return TTI::TCC_Expensive; } + unsigned getMemcpyCost(const Instruction *I) const { + return TTI::TCC_Expensive; + } - bool hasBranchDivergence() { return false; } + bool hasBranchDivergence() const { return false; } - bool useGPUDivergenceAnalysis() { return false; } + bool useGPUDivergenceAnalysis() const { return false; } - bool isSourceOfDivergence(const Value *V) { return false; } + bool isSourceOfDivergence(const Value *V) const { return false; } - bool isAlwaysUniform(const Value *V) { return false; } + bool isAlwaysUniform(const Value *V) const { return false; } - unsigned getFlatAddressSpace() { return -1; } + unsigned getFlatAddressSpace() const { return -1; } bool collectFlatAddressOperands(SmallVectorImpl &OpIndexes, Intrinsic::ID IID) const { @@ -96,7 +98,7 @@ class TargetTransformInfoImplBase { return nullptr; } - bool isLoweredToCall(const Function *F) { + bool isLoweredToCall(const Function *F) const { assert(F && "A concrete function must be provided to this routine."); // FIXME: These should almost certainly not be handled here, and instead @@ -134,7 +136,7 @@ class TargetTransformInfoImplBase { bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, - HardwareLoopInfo &HWLoopInfo) { + HardwareLoopInfo &HWLoopInfo) const { return false; } @@ -170,39 +172,39 @@ class TargetTransformInfoImplBase { } void getUnrollingPreferences(Loop *, ScalarEvolution &, - TTI::UnrollingPreferences &) {} + TTI::UnrollingPreferences &) const {} void getPeelingPreferences(Loop *, ScalarEvolution &, - TTI::PeelingPreferences &) {} + TTI::PeelingPreferences &) const {} - bool isLegalAddImmediate(int64_t Imm) { return false; } + bool isLegalAddImmediate(int64_t Imm) const { return false; } - bool isLegalICmpImmediate(int64_t Imm) { return false; } + bool isLegalICmpImmediate(int64_t Imm) const { return false; } bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, - Instruction *I = nullptr) { + Instruction *I = nullptr) const { // Guess that only reg and reg+reg addressing is allowed. This heuristic is // taken from the implementation of LSR. return !BaseGV && BaseOffset == 0 && (Scale == 0 || Scale == 1); } - bool isLSRCostLess(TTI::LSRCost &C1, TTI::LSRCost &C2) { + bool isLSRCostLess(TTI::LSRCost &C1, TTI::LSRCost &C2) const { return std::tie(C1.NumRegs, C1.AddRecCost, C1.NumIVMuls, C1.NumBaseAdds, C1.ScaleCost, C1.ImmCost, C1.SetupCost) < std::tie(C2.NumRegs, C2.AddRecCost, C2.NumIVMuls, C2.NumBaseAdds, C2.ScaleCost, C2.ImmCost, C2.SetupCost); } - bool isNumRegsMajorCostOfLSR() { return true; } + bool isNumRegsMajorCostOfLSR() const { return true; } - bool isProfitableLSRChainElement(Instruction *I) { return false; } + bool isProfitableLSRChainElement(Instruction *I) const { return false; } - bool canMacroFuseCmp() { return false; } + bool canMacroFuseCmp() const { return false; } bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, - TargetLibraryInfo *LibInfo) { + TargetLibraryInfo *LibInfo) const { return false; } @@ -210,40 +212,51 @@ class TargetTransformInfoImplBase { bool shouldFavorBackedgeIndex(const Loop *L) const { return false; } - bool isLegalMaskedStore(Type *DataType, Align Alignment) { return false; } + bool isLegalMaskedStore(Type *DataType, Align Alignment) const { + return false; + } - bool isLegalMaskedLoad(Type *DataType, Align Alignment) { return false; } + bool isLegalMaskedLoad(Type *DataType, Align Alignment) const { + return false; + } - bool isLegalNTStore(Type *DataType, Align Alignment) { + bool isLegalNTStore(Type *DataType, Align Alignment) const { // By default, assume nontemporal memory stores are available for stores // that are aligned and have a size that is a power of 2. unsigned DataSize = DL.getTypeStoreSize(DataType); return Alignment >= DataSize && isPowerOf2_32(DataSize); } - bool isLegalNTLoad(Type *DataType, Align Alignment) { + bool isLegalNTLoad(Type *DataType, Align Alignment) const { // By default, assume nontemporal memory loads are available for loads that // are aligned and have a size that is a power of 2. unsigned DataSize = DL.getTypeStoreSize(DataType); return Alignment >= DataSize && isPowerOf2_32(DataSize); } - bool isLegalMaskedScatter(Type *DataType, Align Alignment) { return false; } + bool isLegalMaskedScatter(Type *DataType, Align Alignment) const { + return false; + } - bool isLegalMaskedGather(Type *DataType, Align Alignment) { return false; } + bool isLegalMaskedGather(Type *DataType, Align Alignment) const { + return false; + } - bool isLegalMaskedCompressStore(Type *DataType) { return false; } + bool isLegalMaskedCompressStore(Type *DataType) const { return false; } - bool isLegalMaskedExpandLoad(Type *DataType) { return false; } + bool isLegalMaskedExpandLoad(Type *DataType) const { return false; } - bool hasDivRemOp(Type *DataType, bool IsSigned) { return false; } + bool hasDivRemOp(Type *DataType, bool IsSigned) const { return false; } - bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) { return false; } + bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const { + return false; + } - bool prefersVectorizedAddressing() { return true; } + bool prefersVectorizedAddressing() const { return true; } int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, - bool HasBaseReg, int64_t Scale, unsigned AddrSpace) { + bool HasBaseReg, int64_t Scale, + unsigned AddrSpace) const { // Guess that all legal addressing mode are free. if (isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale, AddrSpace)) @@ -251,83 +264,87 @@ class TargetTransformInfoImplBase { return -1; } - bool LSRWithInstrQueries() { return false; } + bool LSRWithInstrQueries() const { return false; } - bool isTruncateFree(Type *Ty1, Type *Ty2) { return false; } + bool isTruncateFree(Type *Ty1, Type *Ty2) const { return false; } - bool isProfitableToHoist(Instruction *I) { return true; } + bool isProfitableToHoist(Instruction *I) const { return true; } - bool useAA() { return false; } + bool useAA() const { return false; } - bool isTypeLegal(Type *Ty) { return false; } + bool isTypeLegal(Type *Ty) const { return false; } - unsigned getRegUsageForType(Type *Ty) { return 1; } + unsigned getRegUsageForType(Type *Ty) const { return 1; } - bool shouldBuildLookupTables() { return true; } - bool shouldBuildLookupTablesForConstant(Constant *C) { return true; } + bool shouldBuildLookupTables() const { return true; } + bool shouldBuildLookupTablesForConstant(Constant *C) const { return true; } - bool useColdCCForColdCall(Function &F) { return false; } + bool useColdCCForColdCall(Function &F) const { return false; } unsigned getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, - bool Insert, bool Extract) { + bool Insert, bool Extract) const { return 0; } unsigned getOperandsScalarizationOverhead(ArrayRef Args, - unsigned VF) { + unsigned VF) const { return 0; } - bool supportsEfficientVectorElementLoadStore() { return false; } + bool supportsEfficientVectorElementLoadStore() const { return false; } - bool enableAggressiveInterleaving(bool LoopHasReductions) { return false; } + bool enableAggressiveInterleaving(bool LoopHasReductions) const { + return false; + } TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const { return {}; } - bool enableInterleavedAccessVectorization() { return false; } + bool enableInterleavedAccessVectorization() const { return false; } - bool enableMaskedInterleavedAccessVectorization() { return false; } + bool enableMaskedInterleavedAccessVectorization() const { return false; } - bool isFPVectorizationPotentiallyUnsafe() { return false; } + bool isFPVectorizationPotentiallyUnsafe() const { return false; } bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace, unsigned Alignment, - bool *Fast) { + bool *Fast) const { return false; } - TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) { + TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const { return TTI::PSK_Software; } - bool haveFastSqrt(Type *Ty) { return false; } + bool haveFastSqrt(Type *Ty) const { return false; } - bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) { return true; } + bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const { return true; } - unsigned getFPOpCost(Type *Ty) { return TargetTransformInfo::TCC_Basic; } + unsigned getFPOpCost(Type *Ty) const { + return TargetTransformInfo::TCC_Basic; + } int getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, const APInt &Imm, - Type *Ty) { + Type *Ty) const { return 0; } unsigned getIntImmCost(const APInt &Imm, Type *Ty, - TTI::TargetCostKind CostKind) { + TTI::TargetCostKind CostKind) const { return TTI::TCC_Basic; } unsigned getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, - Instruction *Inst = nullptr) { + Instruction *Inst = nullptr) const { return TTI::TCC_Free; } unsigned getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, - TTI::TargetCostKind CostKind) { + TTI::TargetCostKind CostKind) const { return TTI::TCC_Free; } @@ -350,7 +367,7 @@ class TargetTransformInfoImplBase { unsigned getRegisterBitWidth(bool Vector) const { return 32; } - unsigned getMinVectorRegisterBitWidth() { return 128; } + unsigned getMinVectorRegisterBitWidth() const { return 128; } bool shouldMaximizeVectorBandwidth(bool OptSize) const { return false; } @@ -358,9 +375,8 @@ class TargetTransformInfoImplBase { unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const { return 0; } - bool - shouldConsiderAddressTypePromotion(const Instruction &I, - bool &AllowPromotionWithoutCommonHeader) { + bool shouldConsiderAddressTypePromotion( + const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const { AllowPromotionWithoutCommonHeader = false; return false; } @@ -399,7 +415,7 @@ class TargetTransformInfoImplBase { unsigned getMaxPrefetchIterationsAhead() const { return UINT_MAX; } bool enableWritePrefetching() const { return false; } - unsigned getMaxInterleaveFactor(unsigned VF) { return 1; } + unsigned getMaxInterleaveFactor(unsigned VF) const { return 1; } unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, @@ -408,7 +424,7 @@ class TargetTransformInfoImplBase { TTI::OperandValueProperties Opd1PropInfo, TTI::OperandValueProperties Opd2PropInfo, ArrayRef Args, - const Instruction *CxtI = nullptr) { + const Instruction *CxtI = nullptr) const { // FIXME: A number of transformation tests seem to require these values // which seems a little odd for how arbitary there are. switch (Opcode) { @@ -427,14 +443,14 @@ class TargetTransformInfoImplBase { } unsigned getShuffleCost(TTI::ShuffleKind Kind, VectorType *Ty, int Index, - VectorType *SubTp) { + VectorType *SubTp) const { return 1; } unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, - const Instruction *I) { + const Instruction *I) const { switch (Opcode) { default: break; @@ -470,12 +486,11 @@ class TargetTransformInfoImplBase { } unsigned getExtractWithExtendCost(unsigned Opcode, Type *Dst, - VectorType *VecTy, unsigned Index) { + VectorType *VecTy, unsigned Index) const { return 1; } - unsigned getCFInstrCost(unsigned Opcode, - TTI::TargetCostKind CostKind) { + unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind) const { // A phi would be free, unless we're costing the throughput because it // will require a register. if (Opcode == Instruction::PHI && CostKind != TTI::TCK_RecipThroughput) @@ -490,7 +505,8 @@ class TargetTransformInfoImplBase { return 1; } - unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) { + unsigned getVectorInstrCost(unsigned Opcode, Type *Val, + unsigned Index) const { return 1; } @@ -502,26 +518,26 @@ class TargetTransformInfoImplBase { unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, - TTI::TargetCostKind CostKind) { + TTI::TargetCostKind CostKind) const { return 1; } unsigned getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, - const Instruction *I = nullptr) { + const Instruction *I = nullptr) const { return 1; } unsigned getInterleavedMemoryOpCost( unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, - bool UseMaskForCond, bool UseMaskForGaps) { + bool UseMaskForCond, bool UseMaskForGaps) const { return 1; } unsigned getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, - TTI::TargetCostKind CostKind) { + TTI::TargetCostKind CostKind) const { switch (ICA.getID()) { default: break; @@ -560,26 +576,32 @@ class TargetTransformInfoImplBase { } unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef Tys, - TTI::TargetCostKind CostKind) { + TTI::TargetCostKind CostKind) const { return 1; } - unsigned getNumberOfParts(Type *Tp) { return 0; } + unsigned getNumberOfParts(Type *Tp) const { return 0; } unsigned getAddressComputationCost(Type *Tp, ScalarEvolution *, - const SCEV *) { + const SCEV *) const { return 0; } unsigned getArithmeticReductionCost(unsigned, VectorType *, bool, - TTI::TargetCostKind) { return 1; } + TTI::TargetCostKind) const { + return 1; + } unsigned getMinMaxReductionCost(VectorType *, VectorType *, bool, bool, - TTI::TargetCostKind) { return 1; } + TTI::TargetCostKind) const { + return 1; + } - unsigned getCostOfKeepingLiveOverCall(ArrayRef Tys) { return 0; } + unsigned getCostOfKeepingLiveOverCall(ArrayRef Tys) const { + return 0; + } - bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) { + bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const { return false; } @@ -593,7 +615,7 @@ class TargetTransformInfoImplBase { } Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, - Type *ExpectedType) { + Type *ExpectedType) const { return nullptr; } @@ -692,7 +714,7 @@ class TargetTransformInfoImplBase { protected: // Obtain the minimum required size to hold the value (without the sign) // In case of a vector it returns the min required size for one element. - unsigned minRequiredElementSize(const Value *Val, bool &isSigned) { + unsigned minRequiredElementSize(const Value *Val, bool &isSigned) const { if (isa(Val) || isa(Val)) { const auto *VectorValue = cast(Val); @@ -746,12 +768,12 @@ class TargetTransformInfoImplBase { return Val->getType()->getScalarSizeInBits(); } - bool isStridedAccess(const SCEV *Ptr) { + bool isStridedAccess(const SCEV *Ptr) const { return Ptr && isa(Ptr); } const SCEVConstant *getConstantStrideStep(ScalarEvolution *SE, - const SCEV *Ptr) { + const SCEV *Ptr) const { if (!isStridedAccess(Ptr)) return nullptr; const SCEVAddRecExpr *AddRec = cast(Ptr); @@ -759,7 +781,7 @@ class TargetTransformInfoImplBase { } bool isConstantStridedAccessLessThan(ScalarEvolution *SE, const SCEV *Ptr, - int64_t MergeDistance) { + int64_t MergeDistance) const { const SCEVConstant *Step = getConstantStrideStep(SE, Ptr); if (!Step) return false; From a9f14cdc6203c05425f8b17228ff368f7fd9ae29 Mon Sep 17 00:00:00 2001 From: David Penry Date: Wed, 23 Dec 2020 14:00:59 +0000 Subject: [PATCH 170/378] [ARM] Add bank conflict hazarding Adds ARMBankConflictHazardRecognizer. This hazard recognizer looks for a few situations where the same base pointer is used and then checks whether the offsets lead to a bank conflict. Two parameters are also added to permit overriding of the target assumptions: arm-data-bank-mask= - Mask of bits which are to be checked for conflicts. If all these bits are equal in the offsets, there is a conflict. arm-assume-itcm-bankconflict= - Assume that there will be bank conflicts on any loads to a constant pool. This hazard recognizer is enabled for Cortex-M7, where the Technical Reference Manual states that there are two DTCM banks banked using bit 2 and one ITCM bank. Differential Revision: https://reviews.llvm.org/D93054 --- llvm/lib/Target/ARM/ARM.td | 4 +- llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp | 25 +++ llvm/lib/Target/ARM/ARMBaseInstrInfo.h | 4 + llvm/lib/Target/ARM/ARMHazardRecognizer.cpp | 173 ++++++++++++++++++++ llvm/lib/Target/ARM/ARMHazardRecognizer.h | 32 ++++ llvm/lib/Target/ARM/ARMSubtarget.cpp | 1 + llvm/lib/Target/ARM/ARMSubtarget.h | 2 + llvm/test/CodeGen/Thumb2/schedm7-hazard.ll | 38 +++++ 8 files changed, 278 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/Thumb2/schedm7-hazard.ll diff --git a/llvm/lib/Target/ARM/ARM.td b/llvm/lib/Target/ARM/ARM.td index 8111346c74f6c..5d626e7d8e5a2 100644 --- a/llvm/lib/Target/ARM/ARM.td +++ b/llvm/lib/Target/ARM/ARM.td @@ -660,7 +660,8 @@ def ProcR52 : SubtargetFeature<"r52", "ARMProcFamily", "CortexR52", def ProcM3 : SubtargetFeature<"m3", "ARMProcFamily", "CortexM3", "Cortex-M3 ARM processors", []>; - +def ProcM7 : SubtargetFeature<"m7", "ARMProcFamily", "CortexM7", + "Cortex-M7 ARM processors", []>; //===----------------------------------------------------------------------===// // ARM Helper classes. @@ -1191,6 +1192,7 @@ def : ProcessorModel<"cortex-m4", CortexM4Model, [ARMv7em, FeatureHasNoBranchPredictor]>; def : ProcessorModel<"cortex-m7", CortexM7Model, [ARMv7em, + ProcM7, FeatureFPARMv8_D16, FeatureUseMISched]>; diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp index def6312769502..563f2d38edf02 100644 --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -134,6 +134,31 @@ ARMBaseInstrInfo::CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, return TargetInstrInfo::CreateTargetHazardRecognizer(STI, DAG); } +// Called during: +// - pre-RA scheduling +// - post-RA scheduling when FeatureUseMISched is set +ScheduleHazardRecognizer *ARMBaseInstrInfo::CreateTargetMIHazardRecognizer( + const InstrItineraryData *II, const ScheduleDAGMI *DAG) const { + MultiHazardRecognizer *MHR = new MultiHazardRecognizer(); + + // We would like to restrict this hazard recognizer to only + // post-RA scheduling; we can tell that we're post-RA because we don't + // track VRegLiveness. + // Cortex-M7: TRM indicates that there is a single ITCM bank and two DTCM + // banks banked on bit 2. Assume that TCMs are in use. + if (Subtarget.isCortexM7() && !DAG->hasVRegLiveness()) + MHR->AddHazardRecognizer( + std::make_unique(DAG, 0x4, true)); + + // Not inserting ARMHazardRecognizerFPMLx because that would change + // legacy behavior + + auto BHR = TargetInstrInfo::CreateTargetMIHazardRecognizer(II, DAG); + MHR->AddHazardRecognizer(std::unique_ptr(BHR)); + return MHR; +} + +// Called during post-RA scheduling when FeatureUseMISched is not set ScheduleHazardRecognizer *ARMBaseInstrInfo:: CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const { diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h index 9b6572848ebef..deb008025b1d5 100644 --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h @@ -131,6 +131,10 @@ class ARMBaseInstrInfo : public ARMGenInstrInfo { CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, const ScheduleDAG *DAG) const override; + ScheduleHazardRecognizer * + CreateTargetMIHazardRecognizer(const InstrItineraryData *II, + const ScheduleDAGMI *DAG) const override; + ScheduleHazardRecognizer * CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const override; diff --git a/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp b/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp index 3cbc8da863c3d..f083fa6662e93 100644 --- a/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp +++ b/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp @@ -10,11 +10,19 @@ #include "ARMBaseInstrInfo.h" #include "ARMBaseRegisterInfo.h" #include "ARMSubtarget.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/Support/CommandLine.h" + using namespace llvm; +static cl::opt DataBankMask("arm-data-bank-mask", cl::init(-1), + cl::Hidden); +static cl::opt AssumeITCMConflict("arm-assume-itcm-bankconflict", + cl::init(false), cl::Hidden); + static bool hasRAWHazard(MachineInstr *DefMI, MachineInstr *MI, const TargetRegisterInfo &TRI) { // FIXME: Detect integer instructions properly. @@ -93,3 +101,168 @@ void ARMHazardRecognizerFPMLx::AdvanceCycle() { void ARMHazardRecognizerFPMLx::RecedeCycle() { llvm_unreachable("reverse ARM hazard checking unsupported"); } + +///////// Bank conflicts handled as hazards ////////////// + +static bool getBaseOffset(const MachineInstr &MI, const MachineOperand *&BaseOp, + int64_t &Offset) { + + uint64_t TSFlags = MI.getDesc().TSFlags; + unsigned AddrMode = (TSFlags & ARMII::AddrModeMask); + unsigned IndexMode = + (TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift; + + // Address mode tells us what we want to know about operands for T2 + // instructions (but not size). It tells us size (but not about operands) + // for T1 instructions. + switch (AddrMode) { + default: + return false; + case ARMII::AddrModeT2_i8: + // t2LDRBT, t2LDRB_POST, t2LDRB_PRE, t2LDRBi8, + // t2LDRHT, t2LDRH_POST, t2LDRH_PRE, t2LDRHi8, + // t2LDRSBT, t2LDRSB_POST, t2LDRSB_PRE, t2LDRSBi8, + // t2LDRSHT, t2LDRSH_POST, t2LDRSH_PRE, t2LDRSHi8, + // t2LDRT, t2LDR_POST, t2LDR_PRE, t2LDRi8 + BaseOp = &MI.getOperand(1); + Offset = (IndexMode == ARMII::IndexModePost) + ? 0 + : (IndexMode == ARMII::IndexModePre || + IndexMode == ARMII::IndexModeUpd) + ? MI.getOperand(3).getImm() + : MI.getOperand(2).getImm(); + return true; + case ARMII::AddrModeT2_i12: + // t2LDRBi12, t2LDRHi12 + // t2LDRSBi12, t2LDRSHi12 + // t2LDRi12 + BaseOp = &MI.getOperand(1); + Offset = MI.getOperand(2).getImm(); + return true; + case ARMII::AddrModeT2_i8s4: + // t2LDRD_POST, t2LDRD_PRE, t2LDRDi8 + BaseOp = &MI.getOperand(2); + Offset = (IndexMode == ARMII::IndexModePost) + ? 0 + : (IndexMode == ARMII::IndexModePre || + IndexMode == ARMII::IndexModeUpd) + ? MI.getOperand(4).getImm() + : MI.getOperand(3).getImm(); + return true; + case ARMII::AddrModeT1_1: + // tLDRBi, tLDRBr (watch out!), TLDRSB + case ARMII::AddrModeT1_2: + // tLDRHi, tLDRHr (watch out!), TLDRSH + case ARMII::AddrModeT1_4: + // tLDRi, tLDRr (watch out!) + BaseOp = &MI.getOperand(1); + Offset = MI.getOperand(2).isImm() ? MI.getOperand(2).getImm() : 0; + return MI.getOperand(2).isImm(); + } + return false; +} + +ARMBankConflictHazardRecognizer::ARMBankConflictHazardRecognizer( + const ScheduleDAG *DAG, int64_t CPUBankMask, bool CPUAssumeITCMConflict) + : ScheduleHazardRecognizer(), MF(DAG->MF), DL(DAG->MF.getDataLayout()), + DataMask(DataBankMask.getNumOccurrences() ? int64_t(DataBankMask) + : CPUBankMask), + AssumeITCMBankConflict(AssumeITCMConflict.getNumOccurrences() + ? AssumeITCMConflict + : CPUAssumeITCMConflict) { + MaxLookAhead = 1; +} + +ScheduleHazardRecognizer::HazardType +ARMBankConflictHazardRecognizer::CheckOffsets(unsigned O0, unsigned O1) { + return (((O0 ^ O1) & DataMask) != 0) ? NoHazard : Hazard; +} + +ScheduleHazardRecognizer::HazardType +ARMBankConflictHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { + MachineInstr &L0 = *SU->getInstr(); + if (!L0.mayLoad() || L0.mayStore() || L0.getNumMemOperands() != 1) + return NoHazard; + + auto MO0 = *L0.memoperands().begin(); + auto BaseVal0 = MO0->getValue(); + auto BasePseudoVal0 = MO0->getPseudoValue(); + int64_t Offset0 = 0; + + if (MO0->getSize() > 4) + return NoHazard; + + bool SPvalid = false; + const MachineOperand *SP = nullptr; + int64_t SPOffset0 = 0; + + for (auto L1 : Accesses) { + auto MO1 = *L1->memoperands().begin(); + auto BaseVal1 = MO1->getValue(); + auto BasePseudoVal1 = MO1->getPseudoValue(); + int64_t Offset1 = 0; + + // Pointers to the same object + if (BaseVal0 && BaseVal1) { + const Value *Ptr0, *Ptr1; + Ptr0 = GetPointerBaseWithConstantOffset(BaseVal0, Offset0, DL, true); + Ptr1 = GetPointerBaseWithConstantOffset(BaseVal1, Offset1, DL, true); + if (Ptr0 == Ptr1 && Ptr0) + return CheckOffsets(Offset0, Offset1); + } + + if (BasePseudoVal0 && BasePseudoVal1 && + BasePseudoVal0->kind() == BasePseudoVal1->kind() && + BasePseudoVal0->kind() == PseudoSourceValue::FixedStack) { + // Spills/fills + auto FS0 = cast(BasePseudoVal0); + auto FS1 = cast(BasePseudoVal1); + Offset0 = MF.getFrameInfo().getObjectOffset(FS0->getFrameIndex()); + Offset1 = MF.getFrameInfo().getObjectOffset(FS1->getFrameIndex()); + return CheckOffsets(Offset0, Offset1); + } + + // Constant pools (likely in ITCM) + if (BasePseudoVal0 && BasePseudoVal1 && + BasePseudoVal0->kind() == BasePseudoVal1->kind() && + BasePseudoVal0->isConstantPool() && AssumeITCMBankConflict) + return Hazard; + + // Is this a stack pointer-relative access? We could in general try to + // use "is this the same register and is it unchanged?", but the + // memory operand tracking is highly likely to have already found that. + // What we're after here is bank conflicts between different objects in + // the stack frame. + if (!SPvalid) { // set up SP + if (!getBaseOffset(L0, SP, SPOffset0) || SP->getReg().id() != ARM::SP) + SP = nullptr; + SPvalid = true; + } + if (SP) { + int64_t SPOffset1; + const MachineOperand *SP1; + if (getBaseOffset(*L1, SP1, SPOffset1) && SP1->getReg().id() == ARM::SP) + return CheckOffsets(SPOffset0, SPOffset1); + } + } + + return NoHazard; +} + +void ARMBankConflictHazardRecognizer::Reset() { Accesses.clear(); } + +void ARMBankConflictHazardRecognizer::EmitInstruction(SUnit *SU) { + MachineInstr &MI = *SU->getInstr(); + if (!MI.mayLoad() || MI.mayStore() || MI.getNumMemOperands() != 1) + return; + + auto MO = *MI.memoperands().begin(); + uint64_t Size1 = MO->getSize(); + if (Size1 > 4) + return; + Accesses.push_back(&MI); +} + +void ARMBankConflictHazardRecognizer::AdvanceCycle() { Accesses.clear(); } + +void ARMBankConflictHazardRecognizer::RecedeCycle() { Accesses.clear(); } diff --git a/llvm/lib/Target/ARM/ARMHazardRecognizer.h b/llvm/lib/Target/ARM/ARMHazardRecognizer.h index 6d29e0c82063f..c1f1bcd0a629b 100644 --- a/llvm/lib/Target/ARM/ARMHazardRecognizer.h +++ b/llvm/lib/Target/ARM/ARMHazardRecognizer.h @@ -13,10 +13,21 @@ #ifndef LLVM_LIB_TARGET_ARM_ARMHAZARDRECOGNIZER_H #define LLVM_LIB_TARGET_ARM_ARMHAZARDRECOGNIZER_H +#include "ARMBaseInstrInfo.h" +#include "llvm/ADT/BitmaskEnum.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/Support/DataTypes.h" +#include +#include namespace llvm { +class DataLayout; +class MachineFunction; +class MachineInstr; +class ScheduleDAG; + // Hazards related to FP MLx instructions class ARMHazardRecognizerFPMLx : public ScheduleHazardRecognizer { MachineInstr *LastMI = nullptr; @@ -32,6 +43,27 @@ class ARMHazardRecognizerFPMLx : public ScheduleHazardRecognizer { void RecedeCycle() override; }; +// Hazards related to bank conflicts +class ARMBankConflictHazardRecognizer : public ScheduleHazardRecognizer { + SmallVector Accesses; + const MachineFunction &MF; + const DataLayout &DL; + int64_t DataMask; + bool AssumeITCMBankConflict; + +public: + ARMBankConflictHazardRecognizer(const ScheduleDAG *DAG, int64_t DDM, + bool ABC); + HazardType getHazardType(SUnit *SU, int Stalls) override; + void Reset() override; + void EmitInstruction(SUnit *SU) override; + void AdvanceCycle() override; + void RecedeCycle() override; + +private: + inline HazardType CheckOffsets(unsigned O0, unsigned O1); +}; + } // end namespace llvm #endif diff --git a/llvm/lib/Target/ARM/ARMSubtarget.cpp b/llvm/lib/Target/ARM/ARMSubtarget.cpp index d90346df67da1..e2a3d302b1038 100644 --- a/llvm/lib/Target/ARM/ARMSubtarget.cpp +++ b/llvm/lib/Target/ARM/ARMSubtarget.cpp @@ -299,6 +299,7 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { case CortexR5: case CortexR7: case CortexM3: + case CortexM7: case CortexR52: case CortexX1: break; diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h index 778d3ba22a2ff..ac7248ac29c98 100644 --- a/llvm/lib/Target/ARM/ARMSubtarget.h +++ b/llvm/lib/Target/ARM/ARMSubtarget.h @@ -66,6 +66,7 @@ class ARMSubtarget : public ARMGenSubtargetInfo { CortexA8, CortexA9, CortexM3, + CortexM7, CortexR4, CortexR4F, CortexR5, @@ -625,6 +626,7 @@ class ARMSubtarget : public ARMGenSubtargetInfo { bool isCortexA15() const { return ARMProcFamily == CortexA15; } bool isSwift() const { return ARMProcFamily == Swift; } bool isCortexM3() const { return ARMProcFamily == CortexM3; } + bool isCortexM7() const { return ARMProcFamily == CortexM7; } bool isLikeA9() const { return isCortexA9() || isCortexA15() || isKrait(); } bool isCortexR5() const { return ARMProcFamily == CortexR5; } bool isKrait() const { return ARMProcFamily == Krait; } diff --git a/llvm/test/CodeGen/Thumb2/schedm7-hazard.ll b/llvm/test/CodeGen/Thumb2/schedm7-hazard.ll new file mode 100644 index 0000000000000..9572300d8e228 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/schedm7-hazard.ll @@ -0,0 +1,38 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=thumbv7m-none-eabi -mcpu=cortex-m7 | FileCheck %s --check-prefix=CHECK +; RUN: llc < %s -mtriple=thumbv7m-none-eabi -mcpu=cortex-m7 -arm-data-bank-mask=-1 | FileCheck %s --check-prefix=NOBANK + +; This tests the cortex-m7 bank conflict hazard recognizer. +; Normally both loads would be scheduled early (both in the first cycle) due to +; their latency. But will bank conflict to TCM so are scheduled in different +; cycles. + +define i32 @test(i32* %x0, i32 %y, i32 %z) { +; CHECK-LABEL: test: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: ldr r3, [r0] +; CHECK-NEXT: subs r1, r3, r1 +; CHECK-NEXT: ldr r0, [r0, #8] +; CHECK-NEXT: subs r1, r1, r2 +; CHECK-NEXT: adds r1, #1 +; CHECK-NEXT: muls r0, r1, r0 +; CHECK-NEXT: bx lr +; NOBANK-LABEL: test: +; NOBANK: @ %bb.0: @ %entry +; NOBANK-NEXT: ldr r3, [r0] +; NOBANK-NEXT: ldr r0, [r0, #8] +; NOBANK-NEXT: subs r1, r3, r1 +; NOBANK-NEXT: subs r1, r1, r2 +; NOBANK-NEXT: adds r1, #1 +; NOBANK-NEXT: muls r0, r1, r0 +; NOBANK-NEXT: bx lr +entry: + %0 = load i32, i32* %x0, align 4 + %mul3 = add nsw i32 %0, 1 + %mul = sub nsw i32 %mul3, %y + %sub = sub nsw i32 %mul, %z + %arrayidx1 = getelementptr inbounds i32, i32* %x0, i32 2 + %1 = load i32, i32* %arrayidx1, align 4 + %mul2 = mul nsw i32 %sub, %1 + ret i32 %mul2 +} From 6e603464959d43e0e430d0f8ac5522b073d68ba1 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Wed, 23 Dec 2020 09:16:55 -0500 Subject: [PATCH 171/378] [OpenMP] Fixing Typo in Documentation --- openmp/docs/design/Runtimes.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openmp/docs/design/Runtimes.rst b/openmp/docs/design/Runtimes.rst index 39ed256c48569..2e5f2bfe03844 100644 --- a/openmp/docs/design/Runtimes.rst +++ b/openmp/docs/design/Runtimes.rst @@ -67,7 +67,7 @@ will be completely disabled. LIBOMPTARGET_INFO """"""""""""""""" -``LIBOMPTARGET_INFO`` allows the user to request different types runtime +``LIBOMPTARGET_INFO`` allows the user to request different types of runtime information from ``libomptarget``. ``LIBOMPTARGET_INFO`` uses a 32-bit field to enable or disable different types of information. This includes information about data-mappings and kernel execution. It is recommended to build your From 5426b2f9ed9f6f3a3e1d6452325f7a49a5d08ec4 Mon Sep 17 00:00:00 2001 From: mydeveloperday Date: Wed, 23 Dec 2020 14:41:06 +0000 Subject: [PATCH 172/378] [clang-format] PR48535 clang-format Incorrectly Removes Space After C Style Cast When Type Is Not a Pointer https://bugs.llvm.org/show_bug.cgi?id=48535 using `SpaceAfterCStyleCast: true` ``` size_t idx = (size_t) a; size_t idx = (size_t) (a - 1); ``` is formatted as: ``` size_t idx = (size_t) a; size_t idx = (size_t)(a - 1); ``` This revision aims to improve that by improving the function which tries to identify a CastRParen Reviewed By: curdeius Differential Revision: https://reviews.llvm.org/D93626 --- clang/lib/Format/TokenAnnotator.cpp | 7 +++++++ clang/unittests/Format/FormatTest.cpp | 14 ++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index 733ca1e0e8526..a0cb86cfcebfa 100755 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -1915,6 +1915,13 @@ class AnnotatingParser { if (Tok.Next->isOneOf(tok::identifier, tok::kw_this)) return true; + if (Tok.Next->is(tok::l_paren) && + !(Tok.Previous && Tok.Previous->is(tok::identifier) && + Tok.Previous->Previous && + Tok.Previous->Previous->isOneOf(tok::arrowstar, tok::arrow, + tok::star))) + return true; + if (!Tok.Next->Next) return false; diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index d2aed304f213a..ee757c14eafb7 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -11989,6 +11989,20 @@ TEST_F(FormatTest, ConfigurableSpacesInParentheses) { " do_something((int) i);\n" "} while (something( ));", Spaces); + + verifyFormat("size_t idx = (size_t) (ptr - ((char *) file));", Spaces); + verifyFormat("size_t idx = (size_t) a;", Spaces); + verifyFormat("size_t idx = (size_t) (a - 1);", Spaces); + verifyFormat("size_t idx = (a->*foo)(a - 1);", Spaces); + verifyFormat("size_t idx = (a->foo)(a - 1);", Spaces); + verifyFormat("size_t idx = (*foo)(a - 1);", Spaces); + Spaces.SpaceAfterCStyleCast = false; + verifyFormat("size_t idx = (size_t)(ptr - ((char *)file));", Spaces); + verifyFormat("size_t idx = (size_t)a;", Spaces); + verifyFormat("size_t idx = (size_t)(a - 1);", Spaces); + verifyFormat("size_t idx = (a->*foo)(a - 1);", Spaces); + verifyFormat("size_t idx = (a->foo)(a - 1);", Spaces); + verifyFormat("size_t idx = (*foo)(a - 1);", Spaces); } TEST_F(FormatTest, ConfigurableSpacesInSquareBrackets) { From 031743cb5b3c6c2df85a67d8533ef72a95e76cdc Mon Sep 17 00:00:00 2001 From: mydeveloperday Date: Wed, 23 Dec 2020 14:44:31 +0000 Subject: [PATCH 173/378] [clang-format] PR48539 ReflowComments breaks Qt translation comments https://bugs.llvm.org/show_bug.cgi?id=48539 Add support for Qt Translator Comments to reflow When reflown and a part of the comments are added on a new line, it should repeat these extra characters as part of the comment token. Reviewed By: curdeius, HazardyKnusperkeks Differential Revision: https://reviews.llvm.org/D93490 --- clang/lib/Format/BreakableToken.cpp | 4 ++-- clang/unittests/Format/FormatTestComments.cpp | 6 ++++++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/clang/lib/Format/BreakableToken.cpp b/clang/lib/Format/BreakableToken.cpp index ea5cc31af07a7..6a240fdec8b99 100644 --- a/clang/lib/Format/BreakableToken.cpp +++ b/clang/lib/Format/BreakableToken.cpp @@ -41,8 +41,8 @@ static bool IsBlank(char C) { static StringRef getLineCommentIndentPrefix(StringRef Comment, const FormatStyle &Style) { - static const char *const KnownCStylePrefixes[] = {"///<", "//!<", "///", "//", - "//!"}; + static const char *const KnownCStylePrefixes[] = {"///<", "//!<", "///", + "//", "//!", "//:"}; static const char *const KnownTextProtoPrefixes[] = {"//", "#", "##", "###", "####"}; ArrayRef KnownPrefixes(KnownCStylePrefixes); diff --git a/clang/unittests/Format/FormatTestComments.cpp b/clang/unittests/Format/FormatTestComments.cpp index 27dfe71367b30..457e7321ec759 100644 --- a/clang/unittests/Format/FormatTestComments.cpp +++ b/clang/unittests/Format/FormatTestComments.cpp @@ -702,6 +702,12 @@ TEST_F(FormatTestComments, SplitsLongCxxComments) { " // long 1 2 3 4 5 6\n" "}", getLLVMStyleWithColumns(20))); + + EXPECT_EQ("//: A comment that\n" + "//: doesn't fit on\n" + "//: one line", + format("//: A comment that doesn't fit on one line", + getLLVMStyleWithColumns(20))); } TEST_F(FormatTestComments, PreservesHangingIndentInCxxComments) { From 1d0dc9be6d72915d2bb632c7a46645289405dcbf Mon Sep 17 00:00:00 2001 From: ergawy Date: Wed, 23 Dec 2020 15:32:31 +0100 Subject: [PATCH 174/378] [MLIR][SPIRV] Add rewrite pattern to convert select+cmp into GLSL clamp. Adds rewrite patterns to convert select+cmp instructions into clamp instructions whenever possible. Support is added to convert: - FOrdLessThan, FOrdLessThanEqual to GLSLFClampOp. - SLessThan, SLessThanEqual to GLSLSClampOp. - ULessThan, ULessThanEqual to GLSLUClampOp. Reviewed By: mravishankar Differential Revision: https://reviews.llvm.org/D93618 --- .../SPIRV/IR/SPIRVGLSLCanonicalization.h | 31 +++++ mlir/lib/Dialect/SPIRV/IR/CMakeLists.txt | 1 + .../Dialect/SPIRV/IR/SPIRVCanonicalization.td | 30 +++++ .../SPIRV/IR/SPIRVGLSLCanonicalization.cpp | 35 ++++++ .../SPIRV/Transforms/glsl_canonicalize.mlir | 113 ++++++++++++++++++ mlir/test/lib/Dialect/SPIRV/CMakeLists.txt | 1 + .../SPIRV/TestGLSLCanonicalization.cpp | 39 ++++++ mlir/tools/mlir-opt/mlir-opt.cpp | 2 + 8 files changed, 252 insertions(+) create mode 100644 mlir/include/mlir/Dialect/SPIRV/IR/SPIRVGLSLCanonicalization.h create mode 100644 mlir/lib/Dialect/SPIRV/IR/SPIRVGLSLCanonicalization.cpp create mode 100644 mlir/test/Dialect/SPIRV/Transforms/glsl_canonicalize.mlir create mode 100644 mlir/test/lib/Dialect/SPIRV/TestGLSLCanonicalization.cpp diff --git a/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVGLSLCanonicalization.h b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVGLSLCanonicalization.h new file mode 100644 index 0000000000000..1921dbbcfc70e --- /dev/null +++ b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVGLSLCanonicalization.h @@ -0,0 +1,31 @@ +//===- SPIRVGLSLCanonicalization.h - GLSL-specific patterns -----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file declares a function to register SPIR-V GLSL-specific +// canonicalization patterns. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_DIALECT_SPIRV_IR_SPIRVGLSLCANONICALIZATION_H_ +#define MLIR_DIALECT_SPIRV_IR_SPIRVGLSLCANONICALIZATION_H_ + +#include "mlir/IR/MLIRContext.h" +#include "mlir/IR/PatternMatch.h" + +//===----------------------------------------------------------------------===// +// GLSL canonicalization patterns +//===----------------------------------------------------------------------===// + +namespace mlir { +namespace spirv { +void populateSPIRVGLSLCanonicalizationPatterns( + mlir::OwningRewritePatternList &results, mlir::MLIRContext *context); +} // namespace spirv +} // namespace mlir + +#endif // MLIR_DIALECT_SPIRV_IR_SPIRVGLSLCANONICALIZATION_H_ diff --git a/mlir/lib/Dialect/SPIRV/IR/CMakeLists.txt b/mlir/lib/Dialect/SPIRV/IR/CMakeLists.txt index dbf62425878b6..42c0047168b91 100644 --- a/mlir/lib/Dialect/SPIRV/IR/CMakeLists.txt +++ b/mlir/lib/Dialect/SPIRV/IR/CMakeLists.txt @@ -5,6 +5,7 @@ add_public_tablegen_target(MLIRSPIRVCanonicalizationIncGen) add_mlir_dialect_library(MLIRSPIRV SPIRVAttributes.cpp SPIRVCanonicalization.cpp + SPIRVGLSLCanonicalization.cpp SPIRVDialect.cpp SPIRVEnums.cpp SPIRVOps.cpp diff --git a/mlir/lib/Dialect/SPIRV/IR/SPIRVCanonicalization.td b/mlir/lib/Dialect/SPIRV/IR/SPIRVCanonicalization.td index 125e973608654..6e0ee4488fa14 100644 --- a/mlir/lib/Dialect/SPIRV/IR/SPIRVCanonicalization.td +++ b/mlir/lib/Dialect/SPIRV/IR/SPIRVCanonicalization.td @@ -38,3 +38,33 @@ def ConvertLogicalNotOfLogicalEqual : Pat< def ConvertLogicalNotOfLogicalNotEqual : Pat< (SPV_LogicalNotOp (SPV_LogicalNotEqualOp $lhs, $rhs)), (SPV_LogicalEqualOp $lhs, $rhs)>; + +//===----------------------------------------------------------------------===// +// Re-write spv.Select + spv. to a suitable variant of +// spv. +//===----------------------------------------------------------------------===// + +def ValuesAreEqual : Constraint>; + +foreach CmpClampPair = [ + [SPV_FOrdLessThanOp, SPV_GLSLFClampOp], + [SPV_FOrdLessThanEqualOp, SPV_GLSLFClampOp], + [SPV_SLessThanOp, SPV_GLSLSClampOp], + [SPV_SLessThanEqualOp, SPV_GLSLSClampOp], + [SPV_ULessThanOp, SPV_GLSLUClampOp], + [SPV_ULessThanEqualOp, SPV_GLSLUClampOp]] in { +def ConvertComparisonIntoClamp#CmpClampPair[0] : Pat< + (SPV_SelectOp + (CmpClampPair[0] + (SPV_SelectOp:$middle0 + (CmpClampPair[0] $min, $input), + $input, + $min + ), + $max + ), + $middle1, + $max), + (CmpClampPair[1] $input, $min, $max), + [(ValuesAreEqual $middle0, $middle1)]>; +} diff --git a/mlir/lib/Dialect/SPIRV/IR/SPIRVGLSLCanonicalization.cpp b/mlir/lib/Dialect/SPIRV/IR/SPIRVGLSLCanonicalization.cpp new file mode 100644 index 0000000000000..0aa413941efd2 --- /dev/null +++ b/mlir/lib/Dialect/SPIRV/IR/SPIRVGLSLCanonicalization.cpp @@ -0,0 +1,35 @@ +//===- SPIRVGLSLCanonicalization.cpp - SPIR-V GLSL canonicalization patterns =// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the canonicalization patterns for SPIR-V GLSL-specific ops. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/SPIRV/IR/SPIRVGLSLCanonicalization.h" + +#include "mlir/Dialect/SPIRV/IR/SPIRVOps.h" + +using namespace mlir; + +namespace { +#include "SPIRVCanonicalization.inc" +} // end anonymous namespace + +namespace mlir { +namespace spirv { +void populateSPIRVGLSLCanonicalizationPatterns( + OwningRewritePatternList &results, MLIRContext *context) { + results.insert(context); +} +} // namespace spirv +} // namespace mlir diff --git a/mlir/test/Dialect/SPIRV/Transforms/glsl_canonicalize.mlir b/mlir/test/Dialect/SPIRV/Transforms/glsl_canonicalize.mlir new file mode 100644 index 0000000000000..90e9b85b9035b --- /dev/null +++ b/mlir/test/Dialect/SPIRV/Transforms/glsl_canonicalize.mlir @@ -0,0 +1,113 @@ +// RUN: mlir-opt -test-spirv-glsl-canonicalization -split-input-file -verify-diagnostics %s | FileCheck %s + +// CHECK: func @clamp_fordlessthan(%[[INPUT:.*]]: f32) +func @clamp_fordlessthan(%input: f32) -> f32 { + // CHECK: %[[MIN:.*]] = spv.constant + %min = spv.constant 0.5 : f32 + // CHECK: %[[MAX:.*]] = spv.constant + %max = spv.constant 1.0 : f32 + + // CHECK: [[RES:%.*]] = spv.GLSL.FClamp %[[INPUT]], %[[MIN]], %[[MAX]] + %0 = spv.FOrdLessThan %min, %input : f32 + %mid = spv.Select %0, %input, %min : i1, f32 + %1 = spv.FOrdLessThan %mid, %max : f32 + %2 = spv.Select %1, %mid, %max : i1, f32 + + // CHECK-NEXT: spv.ReturnValue [[RES]] + spv.ReturnValue %2 : f32 +} + +// ----- + +// CHECK: func @clamp_fordlessthanequal(%[[INPUT:.*]]: f32) +func @clamp_fordlessthanequal(%input: f32) -> f32 { + // CHECK: %[[MIN:.*]] = spv.constant + %min = spv.constant 0.5 : f32 + // CHECK: %[[MAX:.*]] = spv.constant + %max = spv.constant 1.0 : f32 + + // CHECK: [[RES:%.*]] = spv.GLSL.FClamp %[[INPUT]], %[[MIN]], %[[MAX]] + %0 = spv.FOrdLessThanEqual %min, %input : f32 + %mid = spv.Select %0, %input, %min : i1, f32 + %1 = spv.FOrdLessThanEqual %mid, %max : f32 + %2 = spv.Select %1, %mid, %max : i1, f32 + + // CHECK-NEXT: spv.ReturnValue [[RES]] + spv.ReturnValue %2 : f32 +} + +// ----- + +// CHECK: func @clamp_slessthan(%[[INPUT:.*]]: si32) +func @clamp_slessthan(%input: si32) -> si32 { + // CHECK: %[[MIN:.*]] = spv.constant + %min = spv.constant 0 : si32 + // CHECK: %[[MAX:.*]] = spv.constant + %max = spv.constant 10 : si32 + + // CHECK: [[RES:%.*]] = spv.GLSL.SClamp %[[INPUT]], %[[MIN]], %[[MAX]] + %0 = spv.SLessThan %min, %input : si32 + %mid = spv.Select %0, %input, %min : i1, si32 + %1 = spv.SLessThan %mid, %max : si32 + %2 = spv.Select %1, %mid, %max : i1, si32 + + // CHECK-NEXT: spv.ReturnValue [[RES]] + spv.ReturnValue %2 : si32 +} + +// ----- + +// CHECK: func @clamp_slessthanequal(%[[INPUT:.*]]: si32) +func @clamp_slessthanequal(%input: si32) -> si32 { + // CHECK: %[[MIN:.*]] = spv.constant + %min = spv.constant 0 : si32 + // CHECK: %[[MAX:.*]] = spv.constant + %max = spv.constant 10 : si32 + + // CHECK: [[RES:%.*]] = spv.GLSL.SClamp %[[INPUT]], %[[MIN]], %[[MAX]] + %0 = spv.SLessThanEqual %min, %input : si32 + %mid = spv.Select %0, %input, %min : i1, si32 + %1 = spv.SLessThanEqual %mid, %max : si32 + %2 = spv.Select %1, %mid, %max : i1, si32 + + // CHECK-NEXT: spv.ReturnValue [[RES]] + spv.ReturnValue %2 : si32 +} + +// ----- + +// CHECK: func @clamp_ulessthan(%[[INPUT:.*]]: i32) +func @clamp_ulessthan(%input: i32) -> i32 { + // CHECK: %[[MIN:.*]] = spv.constant + %min = spv.constant 0 : i32 + // CHECK: %[[MAX:.*]] = spv.constant + %max = spv.constant 10 : i32 + + // CHECK: [[RES:%.*]] = spv.GLSL.UClamp %[[INPUT]], %[[MIN]], %[[MAX]] + %0 = spv.ULessThan %min, %input : i32 + %mid = spv.Select %0, %input, %min : i1, i32 + %1 = spv.ULessThan %mid, %max : i32 + %2 = spv.Select %1, %mid, %max : i1, i32 + + // CHECK-NEXT: spv.ReturnValue [[RES]] + spv.ReturnValue %2 : i32 +} + +// ----- + +// CHECK: func @clamp_ulessthanequal(%[[INPUT:.*]]: i32) +func @clamp_ulessthanequal(%input: i32) -> i32 { + // CHECK: %[[MIN:.*]] = spv.constant + %min = spv.constant 0 : i32 + // CHECK: %[[MAX:.*]] = spv.constant + %max = spv.constant 10 : i32 + + // CHECK: [[RES:%.*]] = spv.GLSL.UClamp %[[INPUT]], %[[MIN]], %[[MAX]] + %0 = spv.ULessThanEqual %min, %input : i32 + %mid = spv.Select %0, %input, %min : i1, i32 + %1 = spv.ULessThanEqual %mid, %max : i32 + %2 = spv.Select %1, %mid, %max : i1, i32 + + // CHECK-NEXT: spv.ReturnValue [[RES]] + spv.ReturnValue %2 : i32 +} diff --git a/mlir/test/lib/Dialect/SPIRV/CMakeLists.txt b/mlir/test/lib/Dialect/SPIRV/CMakeLists.txt index edcbf4ebf1de0..856e5eb7f40da 100644 --- a/mlir/test/lib/Dialect/SPIRV/CMakeLists.txt +++ b/mlir/test/lib/Dialect/SPIRV/CMakeLists.txt @@ -2,6 +2,7 @@ add_mlir_library(MLIRSPIRVTestPasses TestAvailability.cpp TestEntryPointAbi.cpp + TestGLSLCanonicalization.cpp TestModuleCombiner.cpp EXCLUDE_FROM_LIBMLIR diff --git a/mlir/test/lib/Dialect/SPIRV/TestGLSLCanonicalization.cpp b/mlir/test/lib/Dialect/SPIRV/TestGLSLCanonicalization.cpp new file mode 100644 index 0000000000000..158601fbc17ab --- /dev/null +++ b/mlir/test/lib/Dialect/SPIRV/TestGLSLCanonicalization.cpp @@ -0,0 +1,39 @@ +//===- TestGLSLCanonicalization.cpp - Pass to test GLSL-specific pattterns ===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/SPIRV/IR/SPIRVGLSLCanonicalization.h" +#include "mlir/Dialect/SPIRV/IR/SPIRVModule.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Transforms/GreedyPatternRewriteDriver.h" + +using namespace mlir; + +namespace { +class TestGLSLCanonicalizationPass + : public PassWrapper> { +public: + TestGLSLCanonicalizationPass() = default; + TestGLSLCanonicalizationPass(const TestGLSLCanonicalizationPass &) {} + void runOnOperation() override; +}; +} // namespace + +void TestGLSLCanonicalizationPass::runOnOperation() { + OwningRewritePatternList patterns; + spirv::populateSPIRVGLSLCanonicalizationPatterns(patterns, &getContext()); + applyPatternsAndFoldGreedily(getOperation(), std::move(patterns)); +} + +namespace mlir { +void registerTestSpirvGLSLCanonicalizationPass() { + PassRegistration registration( + "test-spirv-glsl-canonicalization", + "Tests SPIR-V canonicalization patterns for GLSL extension."); +} +} // namespace mlir diff --git a/mlir/tools/mlir-opt/mlir-opt.cpp b/mlir/tools/mlir-opt/mlir-opt.cpp index 67aa855092efd..dc68f8f4d7789 100644 --- a/mlir/tools/mlir-opt/mlir-opt.cpp +++ b/mlir/tools/mlir-opt/mlir-opt.cpp @@ -47,6 +47,7 @@ void registerTestPrintDefUsePass(); void registerTestPrintNestingPass(); void registerTestReducer(); void registerTestSpirvEntryPointABIPass(); +void registerTestSpirvGLSLCanonicalizationPass(); void registerTestSpirvModuleCombinerPass(); void registerTestTraitsPass(); void registerTosaTestQuantUtilAPIPass(); @@ -115,6 +116,7 @@ void registerTestPasses() { registerTestPrintNestingPass(); registerTestReducer(); registerTestSpirvEntryPointABIPass(); + registerTestSpirvGLSLCanonicalizationPass(); registerTestSpirvModuleCombinerPass(); registerTestTraitsPass(); registerVectorizerTestPass(); From 2522fa053b62520ae48b4b27117ca003a2c878ab Mon Sep 17 00:00:00 2001 From: Aleksandr Platonov Date: Wed, 23 Dec 2020 17:04:54 +0300 Subject: [PATCH 175/378] [clangd] Do not take stale definition from the static index. This is follow up to D93393. Without this patch clangd takes the symbol definition from the static index if this definition was removed from the dynamic index. Reviewed By: sammccall Differential Revision: https://reviews.llvm.org/D93683 --- clang-tools-extra/clangd/index/Merge.cpp | 6 ++++ .../clangd/unittests/IndexTests.cpp | 34 +++++++++++++++++++ 2 files changed, 40 insertions(+) diff --git a/clang-tools-extra/clangd/index/Merge.cpp b/clang-tools-extra/clangd/index/Merge.cpp index 97babacf2b38e..f66f47499624a 100644 --- a/clang-tools-extra/clangd/index/Merge.cpp +++ b/clang-tools-extra/clangd/index/Merge.cpp @@ -76,7 +76,13 @@ void MergedIndex::lookup( Dynamic->lookup(Req, [&](const Symbol &S) { B.insert(S); }); auto RemainingIDs = Req.IDs; + auto DynamicContainsFile = Dynamic->indexedFiles(); Static->lookup(Req, [&](const Symbol &S) { + // We expect the definition to see the canonical declaration, so it seems + // to be enough to check only the definition if it exists. + if (DynamicContainsFile(S.Definition ? S.Definition.FileURI + : S.CanonicalDeclaration.FileURI)) + return; const Symbol *Sym = B.find(S.ID); RemainingIDs.erase(S.ID); if (!Sym) diff --git a/clang-tools-extra/clangd/unittests/IndexTests.cpp b/clang-tools-extra/clangd/unittests/IndexTests.cpp index 8efc637d1250b..ce4845e3e1446 100644 --- a/clang-tools-extra/clangd/unittests/IndexTests.cpp +++ b/clang-tools-extra/clangd/unittests/IndexTests.cpp @@ -290,6 +290,40 @@ TEST(MergeIndexTest, Lookup) { EXPECT_THAT(lookup(M, {}), UnorderedElementsAre()); } +TEST(MergeIndexTest, LookupRemovedDefinition) { + FileIndex DynamicIndex, StaticIndex; + MergedIndex Merge(&DynamicIndex, &StaticIndex); + + const char *HeaderCode = "class Foo;"; + auto HeaderSymbols = TestTU::withHeaderCode(HeaderCode).headerSymbols(); + auto Foo = findSymbol(HeaderSymbols, "Foo"); + + // Build static index for test.cc with Foo definition + TestTU Test; + Test.HeaderCode = HeaderCode; + Test.Code = "class Foo {};"; + Test.Filename = "test.cc"; + auto AST = Test.build(); + StaticIndex.updateMain(testPath(Test.Filename), AST); + + // Remove Foo definition from test.cc, i.e. build dynamic index for test.cc + // without Foo definition. + Test.Code = "class Foo;"; + AST = Test.build(); + DynamicIndex.updateMain(testPath(Test.Filename), AST); + + // Merged index should not return the symbol definition if this definition + // location is inside a file from the dynamic index. + LookupRequest LookupReq; + LookupReq.IDs = {Foo.ID}; + unsigned SymbolCounter = 0; + Merge.lookup(LookupReq, [&](const Symbol &Sym) { + ++SymbolCounter; + EXPECT_FALSE(Sym.Definition); + }); + EXPECT_EQ(SymbolCounter, 1u); +} + TEST(MergeIndexTest, FuzzyFind) { auto I = MemIndex::build(generateSymbols({"ns::A", "ns::B"}), RefSlab(), RelationSlab()), From 9fb074e7bb12ba20ca5ca628a11d4cb30e7c87cc Mon Sep 17 00:00:00 2001 From: Evgeniy Brevnov Date: Thu, 18 Jun 2020 16:20:55 +0700 Subject: [PATCH 176/378] [BPI] Improve static heuristics for "cold" paths. Current approach doesn't work well in cases when multiple paths are predicted to be "cold". By "cold" paths I mean those containing "unreachable" instruction, call marked with 'cold' attribute and 'unwind' handler of 'invoke' instruction. The issue is that heuristics are applied one by one until the first match and essentially ignores relative hotness/coldness of other paths. New approach unifies processing of "cold" paths by assigning predefined absolute weight to each block estimated to be "cold". Then we propagate these weights up/down IR similarly to existing approach. And finally set up edge probabilities based on estimated block weights. One important difference is how we propagate weight up. Existing approach propagates the same weight to all blocks that are post-dominated by a block with some "known" weight. This is useless at least because it always gives 50\50 distribution which is assumed by default anyway. Worse, it causes the algorithm to skip further heuristics and can miss setting more accurate probability. New algorithm propagates the weight up only to the blocks that dominates and post-dominated by a block with some "known" weight. In other words, those blocks that are either always executed or not executed together. In addition new approach processes loops in an uniform way as well. Essentially loop exit edges are estimated as "cold" paths relative to back edges and should be considered uniformly with other coldness/hotness markers. Reviewed By: yrouban Differential Revision: https://reviews.llvm.org/D79485 --- .../llvm/Analysis/BranchProbabilityInfo.h | 153 ++++- .../llvm/Analysis/LazyBranchProbabilityInfo.h | 2 +- llvm/lib/Analysis/BranchProbabilityInfo.cpp | 645 ++++++++++-------- .../Analysis/OptimizationRemarkEmitter.cpp | 2 +- .../lib/Transforms/Scalar/LoopPredication.cpp | 2 +- .../BlockFrequencyInfo/redundant_edges.ll | 2 +- .../Analysis/BranchProbabilityInfo/basic.ll | 40 +- .../BranchProbabilityInfo/deopt-intrinsic.ll | 4 +- .../BranchProbabilityInfo/deopt-invoke.ll | 107 +++ .../Analysis/BranchProbabilityInfo/loop.ll | 209 +++++- .../BranchProbabilityInfo/noreturn.ll | 35 +- .../BranchProbabilityInfo/unreachable.ll | 154 +++++ .../irtranslator-invoke-probabilities.ll | 2 +- .../transform-block-with-return-to-epilog.ll | 4 +- .../CodeGen/ARM/ifcvt-branch-weight-bug.ll | 2 +- llvm/test/CodeGen/ARM/sub-cmp-peephole.ll | 2 +- .../ARM/v8m.base-jumptable_alignment.ll | 22 +- llvm/test/CodeGen/PowerPC/p10-spill-crgt.ll | 182 +++-- llvm/test/CodeGen/PowerPC/pr36292.ll | 5 +- llvm/test/CodeGen/PowerPC/sms-cpy-1.ll | 1 + llvm/test/CodeGen/SPARC/missinglabel.ll | 2 +- llvm/test/CodeGen/SystemZ/debuginstr-cgp.mir | 4 +- .../WebAssembly/switch-unreachable-default.ll | 4 +- .../CodeGen/X86/2008-04-17-CoalescerBug.ll | 19 +- llvm/test/CodeGen/X86/block-placement.ll | 4 +- .../X86/misched_phys_reg_assign_order.ll | 6 +- llvm/test/CodeGen/X86/pr27501.ll | 10 +- llvm/test/CodeGen/X86/pr37916.ll | 2 +- llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll | 117 ++-- .../Transforms/JumpThreading/thread-prob-3.ll | 4 +- 30 files changed, 1180 insertions(+), 567 deletions(-) create mode 100644 llvm/test/Analysis/BranchProbabilityInfo/deopt-invoke.ll create mode 100644 llvm/test/Analysis/BranchProbabilityInfo/unreachable.ll diff --git a/llvm/include/llvm/Analysis/BranchProbabilityInfo.h b/llvm/include/llvm/Analysis/BranchProbabilityInfo.h index 64c3da80f6ea5..6a286236a80e8 100644 --- a/llvm/include/llvm/Analysis/BranchProbabilityInfo.h +++ b/llvm/include/llvm/Analysis/BranchProbabilityInfo.h @@ -27,6 +27,7 @@ #include #include #include +#include #include namespace llvm { @@ -35,6 +36,7 @@ class Function; class Loop; class LoopInfo; class raw_ostream; +class DominatorTree; class PostDominatorTree; class TargetLibraryInfo; class Value; @@ -51,20 +53,79 @@ class Value; /// identify an edge, since we can have multiple edges from Src to Dst. /// As an example, we can have a switch which jumps to Dst with value 0 and /// value 10. +/// +/// Process of computing branch probabilities can be logically viewed as three +/// step process: +/// +/// First, if there is a profile information associated with the branch then +/// it is trivially translated to branch probabilities. There is one exception +/// from this rule though. Probabilities for edges leading to "unreachable" +/// blocks (blocks with the estimated weight not greater than +/// UNREACHABLE_WEIGHT) are evaluated according to static estimation and +/// override profile information. If no branch probabilities were calculated +/// on this step then take the next one. +/// +/// Second, estimate absolute execution weights for each block based on +/// statically known information. Roots of such information are "cold", +/// "unreachable", "noreturn" and "unwind" blocks. Those blocks get their +/// weights set to BlockExecWeight::COLD, BlockExecWeight::UNREACHABLE, +/// BlockExecWeight::NORETURN and BlockExecWeight::UNWIND respectively. Then the +/// weights are propagated to the other blocks up the domination line. In +/// addition, if all successors have estimated weights set then maximum of these +/// weights assigned to the block itself (while this is not ideal heuristic in +/// theory it's simple and works reasonably well in most cases) and the process +/// repeats. Once the process of weights propagation converges branch +/// probabilities are set for all such branches that have at least one successor +/// with the weight set. Default execution weight (BlockExecWeight::DEFAULT) is +/// used for any successors which doesn't have its weight set. For loop back +/// branches we use their weights scaled by loop trip count equal to +/// 'LBH_TAKEN_WEIGHT/LBH_NOTTAKEN_WEIGHT'. +/// +/// Here is a simple example demonstrating how the described algorithm works. +/// +/// BB1 +/// / \ +/// v v +/// BB2 BB3 +/// / \ +/// v v +/// ColdBB UnreachBB +/// +/// Initially, ColdBB is associated with COLD_WEIGHT and UnreachBB with +/// UNREACHABLE_WEIGHT. COLD_WEIGHT is set to BB2 as maximum between its +/// successors. BB1 and BB3 has no explicit estimated weights and assumed to +/// have DEFAULT_WEIGHT. Based on assigned weights branches will have the +/// following probabilities: +/// P(BB1->BB2) = COLD_WEIGHT/(COLD_WEIGHT + DEFAULT_WEIGHT) = +/// 0xffff / (0xffff + 0xfffff) = 0.0588(5.9%) +/// P(BB1->BB3) = DEFAULT_WEIGHT_WEIGHT/(COLD_WEIGHT + DEFAULT_WEIGHT) = +/// 0xfffff / (0xffff + 0xfffff) = 0.941(94.1%) +/// P(BB2->ColdBB) = COLD_WEIGHT/(COLD_WEIGHT + UNREACHABLE_WEIGHT) = 1(100%) +/// P(BB2->UnreachBB) = +/// UNREACHABLE_WEIGHT/(COLD_WEIGHT+UNREACHABLE_WEIGHT) = 0(0%) +/// +/// If no branch probabilities were calculated on this step then take the next +/// one. +/// +/// Third, apply different kinds of local heuristics for each individual +/// branch until first match. For example probability of a pointer to be null is +/// estimated as PH_TAKEN_WEIGHT/(PH_TAKEN_WEIGHT + PH_NONTAKEN_WEIGHT). If +/// no local heuristic has been matched then branch is left with no explicit +/// probability set and assumed to have default probability. class BranchProbabilityInfo { public: BranchProbabilityInfo() = default; BranchProbabilityInfo(const Function &F, const LoopInfo &LI, const TargetLibraryInfo *TLI = nullptr, + DominatorTree *DT = nullptr, PostDominatorTree *PDT = nullptr) { - calculate(F, LI, TLI, PDT); + calculate(F, LI, TLI, DT, PDT); } BranchProbabilityInfo(BranchProbabilityInfo &&Arg) : Probs(std::move(Arg.Probs)), LastF(Arg.LastF), - PostDominatedByUnreachable(std::move(Arg.PostDominatedByUnreachable)), - PostDominatedByColdCall(std::move(Arg.PostDominatedByColdCall)) {} + EstimatedBlockWeight(std::move(Arg.EstimatedBlockWeight)) {} BranchProbabilityInfo(const BranchProbabilityInfo &) = delete; BranchProbabilityInfo &operator=(const BranchProbabilityInfo &) = delete; @@ -72,8 +133,7 @@ class BranchProbabilityInfo { BranchProbabilityInfo &operator=(BranchProbabilityInfo &&RHS) { releaseMemory(); Probs = std::move(RHS.Probs); - PostDominatedByColdCall = std::move(RHS.PostDominatedByColdCall); - PostDominatedByUnreachable = std::move(RHS.PostDominatedByUnreachable); + EstimatedBlockWeight = std::move(RHS.EstimatedBlockWeight); return *this; } @@ -143,11 +203,13 @@ class BranchProbabilityInfo { } void calculate(const Function &F, const LoopInfo &LI, - const TargetLibraryInfo *TLI, PostDominatorTree *PDT); + const TargetLibraryInfo *TLI, DominatorTree *DT, + PostDominatorTree *PDT); /// Forget analysis results for the given basic block. void eraseBlock(const BasicBlock *BB); + // Data structure to track SCCs for handling irreducible loops. class SccInfo { // Enum of types to classify basic blocks in SCC. Basic block belonging to // SCC is 'Inner' until it is either 'Header' or 'Exiting'. Note that a @@ -236,6 +298,8 @@ class BranchProbabilityInfo { const SccInfo &SccI); const BasicBlock *getBlock() const { return BB; } + BasicBlock *getBlock() { return const_cast(BB); } + LoopData getLoopData() const { return LD; } Loop *getLoop() const { return LD.first; } int getSccNum() const { return LD.second; } @@ -249,6 +313,7 @@ class BranchProbabilityInfo { const BasicBlock *const BB = nullptr; LoopData LD = {nullptr, -1}; }; + // Pair of LoopBlocks representing an edge from first to second block. using LoopEdge = std::pair; @@ -258,27 +323,26 @@ class BranchProbabilityInfo { // a pair (PredBlock and an index in the successors) to specify an edge. using Edge = std::pair; - // Default weight value. Used when we don't have information about the edge. - // TODO: DEFAULT_WEIGHT makes sense during static predication, when none of - // the successors have a weight yet. But it doesn't make sense when providing - // weight to an edge that may have siblings with non-zero weights. This can - // be handled various ways, but it's probably fine for an edge with unknown - // weight to just "inherit" the non-zero weight of an adjacent successor. - static const uint32_t DEFAULT_WEIGHT = 16; - DenseMap Probs; /// Track the last function we run over for printing. const Function *LastF = nullptr; + const LoopInfo *LI = nullptr; + /// Keeps information about all SCCs in a function. std::unique_ptr SccI; - /// Track the set of blocks directly succeeded by a returning block. - SmallPtrSet PostDominatedByUnreachable; + /// Keeps mapping of a basic block to its estimated weight. + SmallDenseMap EstimatedBlockWeight; + + /// Keeps mapping of a loop to estimated weight to enter the loop. + SmallDenseMap EstimatedLoopWeight; - /// Track the set of blocks that always lead to a cold call. - SmallPtrSet PostDominatedByColdCall; + /// Helper to construct LoopBlock for \p BB. + LoopBlock getLoopBlock(const BasicBlock *BB) const { + return LoopBlock(BB, *LI, *SccI.get()); + } /// Returns true if destination block belongs to some loop and source block is /// either doesn't belong to any loop or belongs to a loop which is not inner @@ -301,18 +365,55 @@ class BranchProbabilityInfo { void getLoopExitBlocks(const LoopBlock &LB, SmallVectorImpl &Exits) const; - void computePostDominatedByUnreachable(const Function &F, - PostDominatorTree *PDT); - void computePostDominatedByColdCall(const Function &F, - PostDominatorTree *PDT); - bool calcUnreachableHeuristics(const BasicBlock *BB); + /// Returns estimated weight for \p BB. None if \p BB has no estimated weight. + Optional getEstimatedBlockWeight(const BasicBlock *BB) const; + + /// Returns estimated weight to enter \p L. In other words it is weight of + /// loop's header block not scaled by trip count. Returns None if \p L has no + /// no estimated weight. + Optional getEstimatedLoopWeight(const LoopData &L) const; + + /// Return estimated weight for \p Edge. Returns None if estimated weight is + /// unknown. + Optional getEstimatedEdgeWeight(const LoopEdge &Edge) const; + + /// Iterates over all edges leading from \p SrcBB to \p Successors and + /// returns maximum of all estimated weights. If at least one edge has unknown + /// estimated weight None is returned. + template + Optional + getMaxEstimatedEdgeWeight(const LoopBlock &SrcBB, + iterator_range Successors) const; + + /// If \p LoopBB has no estimated weight then set it to \p BBWeight and + /// return true. Otherwise \p BB's weight remains unchanged and false is + /// returned. In addition all blocks/loops that might need their weight to be + /// re-estimated are put into BlockWorkList/LoopWorkList. + bool updateEstimatedBlockWeight(LoopBlock &LoopBB, uint32_t BBWeight, + SmallVectorImpl &BlockWorkList, + SmallVectorImpl &LoopWorkList); + + /// Starting from \p LoopBB (including \p LoopBB itself) propagate \p BBWeight + /// up the domination tree. + void propagateEstimatedBlockWeight(const LoopBlock &LoopBB, DominatorTree *DT, + PostDominatorTree *PDT, uint32_t BBWeight, + SmallVectorImpl &WorkList, + SmallVectorImpl &LoopWorkList); + + /// Returns block's weight encoded in the IR. + Optional getInitialEstimatedBlockWeight(const BasicBlock *BB); + + // Computes estimated weights for all blocks in \p F. + void computeEestimateBlockWeight(const Function &F, DominatorTree *DT, + PostDominatorTree *PDT); + + /// Based on computed weights by \p computeEstimatedBlockWeight set + /// probabilities on branches. + bool calcEstimatedHeuristics(const BasicBlock *BB); bool calcMetadataWeights(const BasicBlock *BB); - bool calcColdCallHeuristics(const BasicBlock *BB); bool calcPointerHeuristics(const BasicBlock *BB); - bool calcLoopBranchHeuristics(const BasicBlock *BB, const LoopInfo &LI); bool calcZeroHeuristics(const BasicBlock *BB, const TargetLibraryInfo *TLI); bool calcFloatingPointHeuristics(const BasicBlock *BB); - bool calcInvokeHeuristics(const BasicBlock *BB); }; /// Analysis pass which computes \c BranchProbabilityInfo. diff --git a/llvm/include/llvm/Analysis/LazyBranchProbabilityInfo.h b/llvm/include/llvm/Analysis/LazyBranchProbabilityInfo.h index f4249f74104cf..3c632f02905a7 100644 --- a/llvm/include/llvm/Analysis/LazyBranchProbabilityInfo.h +++ b/llvm/include/llvm/Analysis/LazyBranchProbabilityInfo.h @@ -63,7 +63,7 @@ class LazyBranchProbabilityInfoPass : public FunctionPass { BranchProbabilityInfo &getCalculated() { if (!Calculated) { assert(F && LI && "call setAnalysis"); - BPI.calculate(*F, *LI, TLI, nullptr); + BPI.calculate(*F, *LI, TLI, nullptr, nullptr); Calculated = true; } return BPI; diff --git a/llvm/lib/Analysis/BranchProbabilityInfo.cpp b/llvm/lib/Analysis/BranchProbabilityInfo.cpp index d4cb46c82e3a6..884ba484ae191 100644 --- a/llvm/lib/Analysis/BranchProbabilityInfo.cpp +++ b/llvm/lib/Analysis/BranchProbabilityInfo.cpp @@ -61,6 +61,7 @@ INITIALIZE_PASS_BEGIN(BranchProbabilityInfoWrapperPass, "branch-prob", "Branch Probability Analysis", false, true) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass) INITIALIZE_PASS_END(BranchProbabilityInfoWrapperPass, "branch-prob", "Branch Probability Analysis", false, true) @@ -95,8 +96,6 @@ char BranchProbabilityInfoWrapperPass::ID = 0; // Probability of the edge BB2->BB3 = 4 / (124 + 4) = 0.03125 static const uint32_t LBH_TAKEN_WEIGHT = 124; static const uint32_t LBH_NONTAKEN_WEIGHT = 4; -// Unlikely edges within a loop are half as likely as other edges -static const uint32_t LBH_UNLIKELY_WEIGHT = 62; /// Unreachable-terminating branch taken probability. /// @@ -105,20 +104,6 @@ static const uint32_t LBH_UNLIKELY_WEIGHT = 62; /// All reachable probability will proportionally share the remaining part. static const BranchProbability UR_TAKEN_PROB = BranchProbability::getRaw(1); -/// Weight for a branch taken going into a cold block. -/// -/// This is the weight for a branch taken toward a block marked -/// cold. A block is marked cold if it's postdominated by a -/// block containing a call to a cold function. Cold functions -/// are those marked with attribute 'cold'. -static const uint32_t CC_TAKEN_WEIGHT = 4; - -/// Weight for a branch not-taken into a cold block. -/// -/// This is the weight for a branch not taken toward a block marked -/// cold. -static const uint32_t CC_NONTAKEN_WEIGHT = 64; - static const uint32_t PH_TAKEN_WEIGHT = 20; static const uint32_t PH_NONTAKEN_WEIGHT = 12; @@ -135,18 +120,26 @@ static const uint32_t FPH_ORD_WEIGHT = 1024 * 1024 - 1; /// exceptional case, so the result is unlikely. static const uint32_t FPH_UNO_WEIGHT = 1; -/// Invoke-terminating normal branch taken weight -/// -/// This is the weight for branching to the normal destination of an invoke -/// instruction. We expect this to happen most of the time. Set the weight to an -/// absurdly high value so that nested loops subsume it. -static const uint32_t IH_TAKEN_WEIGHT = 1024 * 1024 - 1; - -/// Invoke-terminating normal branch not-taken weight. -/// -/// This is the weight for branching to the unwind destination of an invoke -/// instruction. This is essentially never taken. -static const uint32_t IH_NONTAKEN_WEIGHT = 1; +/// Set of dedicated "absolute" execution weights for a block. These weights are +/// meaningful relative to each other and their derivatives only. +enum class BlockExecWeight : std::uint32_t { + /// Special weight used for cases with exact zero probability. + ZERO = 0x0, + /// Minimal possible non zero weight. + LOWEST_NON_ZERO = 0x1, + /// Weight to an 'unreachable' block. + UNREACHABLE = ZERO, + /// Weight to a block containing non returning call. + NORETURN = LOWEST_NON_ZERO, + /// Weight to 'unwind' block of an invoke instruction. + UNWIND = LOWEST_NON_ZERO, + /// Weight to a 'cold' block. Cold blocks are the ones containing calls marked + /// with attribute 'cold'. + COLD = 0xffff, + /// Default weight is used in cases when there is no dedicated execution + /// weight set. It is not propagated through the domination line either. + DEFAULT = 0xfffff +}; BranchProbabilityInfo::SccInfo::SccInfo(const Function &F) { // Record SCC numbers of blocks in the CFG to identify irreducible loops. @@ -306,133 +299,6 @@ void BranchProbabilityInfo::getLoopExitBlocks( } } -static void UpdatePDTWorklist(const BasicBlock *BB, PostDominatorTree *PDT, - SmallVectorImpl &WorkList, - SmallPtrSetImpl &TargetSet) { - SmallVector Descendants; - SmallPtrSet NewItems; - - PDT->getDescendants(const_cast(BB), Descendants); - for (auto *BB : Descendants) - if (TargetSet.insert(BB).second) - for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) - if (!TargetSet.count(*PI)) - NewItems.insert(*PI); - WorkList.insert(WorkList.end(), NewItems.begin(), NewItems.end()); -} - -/// Compute a set of basic blocks that are post-dominated by unreachables. -void BranchProbabilityInfo::computePostDominatedByUnreachable( - const Function &F, PostDominatorTree *PDT) { - SmallVector WorkList; - for (auto &BB : F) { - const Instruction *TI = BB.getTerminator(); - if (TI->getNumSuccessors() == 0) { - if (isa(TI) || - // If this block is terminated by a call to - // @llvm.experimental.deoptimize then treat it like an unreachable - // since the @llvm.experimental.deoptimize call is expected to - // practically never execute. - BB.getTerminatingDeoptimizeCall()) - UpdatePDTWorklist(&BB, PDT, WorkList, PostDominatedByUnreachable); - } - } - - while (!WorkList.empty()) { - const BasicBlock *BB = WorkList.pop_back_val(); - if (PostDominatedByUnreachable.count(BB)) - continue; - // If the terminator is an InvokeInst, check only the normal destination - // block as the unwind edge of InvokeInst is also very unlikely taken. - if (auto *II = dyn_cast(BB->getTerminator())) { - if (PostDominatedByUnreachable.count(II->getNormalDest())) - UpdatePDTWorklist(BB, PDT, WorkList, PostDominatedByUnreachable); - } - // If all the successors are unreachable, BB is unreachable as well. - else if (!successors(BB).empty() && - llvm::all_of(successors(BB), [this](const BasicBlock *Succ) { - return PostDominatedByUnreachable.count(Succ); - })) - UpdatePDTWorklist(BB, PDT, WorkList, PostDominatedByUnreachable); - } -} - -/// compute a set of basic blocks that are post-dominated by ColdCalls. -void BranchProbabilityInfo::computePostDominatedByColdCall( - const Function &F, PostDominatorTree *PDT) { - SmallVector WorkList; - for (auto &BB : F) - for (auto &I : BB) - if (const CallInst *CI = dyn_cast(&I)) - if (CI->hasFnAttr(Attribute::Cold)) - UpdatePDTWorklist(&BB, PDT, WorkList, PostDominatedByColdCall); - - while (!WorkList.empty()) { - const BasicBlock *BB = WorkList.pop_back_val(); - - // If the terminator is an InvokeInst, check only the normal destination - // block as the unwind edge of InvokeInst is also very unlikely taken. - if (auto *II = dyn_cast(BB->getTerminator())) { - if (PostDominatedByColdCall.count(II->getNormalDest())) - UpdatePDTWorklist(BB, PDT, WorkList, PostDominatedByColdCall); - } - // If all of successor are post dominated then BB is also done. - else if (!successors(BB).empty() && - llvm::all_of(successors(BB), [this](const BasicBlock *Succ) { - return PostDominatedByColdCall.count(Succ); - })) - UpdatePDTWorklist(BB, PDT, WorkList, PostDominatedByColdCall); - } -} - -/// Calculate edge weights for successors lead to unreachable. -/// -/// Predict that a successor which leads necessarily to an -/// unreachable-terminated block as extremely unlikely. -bool BranchProbabilityInfo::calcUnreachableHeuristics(const BasicBlock *BB) { - const Instruction *TI = BB->getTerminator(); - (void) TI; - assert(TI->getNumSuccessors() > 1 && "expected more than one successor!"); - assert(!isa(TI) && - "Invokes should have already been handled by calcInvokeHeuristics"); - - SmallVector UnreachableEdges; - SmallVector ReachableEdges; - - for (const_succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) - if (PostDominatedByUnreachable.count(*I)) - UnreachableEdges.push_back(I.getSuccessorIndex()); - else - ReachableEdges.push_back(I.getSuccessorIndex()); - - // Skip probabilities if all were reachable. - if (UnreachableEdges.empty()) - return false; - - SmallVector EdgeProbabilities( - BB->getTerminator()->getNumSuccessors(), BranchProbability::getUnknown()); - if (ReachableEdges.empty()) { - BranchProbability Prob(1, UnreachableEdges.size()); - for (unsigned SuccIdx : UnreachableEdges) - EdgeProbabilities[SuccIdx] = Prob; - setEdgeProbability(BB, EdgeProbabilities); - return true; - } - - auto UnreachableProb = UR_TAKEN_PROB; - auto ReachableProb = - (BranchProbability::getOne() - UR_TAKEN_PROB * UnreachableEdges.size()) / - ReachableEdges.size(); - - for (unsigned SuccIdx : UnreachableEdges) - EdgeProbabilities[SuccIdx] = UnreachableProb; - for (unsigned SuccIdx : ReachableEdges) - EdgeProbabilities[SuccIdx] = ReachableProb; - - setEdgeProbability(BB, EdgeProbabilities); - return true; -} - // Propagate existing explicit probabilities from either profile data or // 'expect' intrinsic processing. Examine metadata against unreachable // heuristic. The probability of the edge coming to unreachable block is @@ -473,7 +339,12 @@ bool BranchProbabilityInfo::calcMetadataWeights(const BasicBlock *BB) { "Too many bits for uint32_t"); Weights.push_back(Weight->getZExtValue()); WeightSum += Weights.back(); - if (PostDominatedByUnreachable.count(TI->getSuccessor(I - 1))) + const LoopBlock SrcLoopBB = getLoopBlock(BB); + const LoopBlock DstLoopBB = getLoopBlock(TI->getSuccessor(I - 1)); + auto EstimatedWeight = getEstimatedEdgeWeight({SrcLoopBB, DstLoopBB}); + if (EstimatedWeight && + EstimatedWeight.getValue() <= + static_cast(BlockExecWeight::UNREACHABLE)) UnreachableIdxs.push_back(I - 1); else ReachableIdxs.push_back(I - 1); @@ -578,60 +449,6 @@ bool BranchProbabilityInfo::calcMetadataWeights(const BasicBlock *BB) { return true; } -/// Calculate edge weights for edges leading to cold blocks. -/// -/// A cold block is one post-dominated by a block with a call to a -/// cold function. Those edges are unlikely to be taken, so we give -/// them relatively low weight. -/// -/// Return true if we could compute the weights for cold edges. -/// Return false, otherwise. -bool BranchProbabilityInfo::calcColdCallHeuristics(const BasicBlock *BB) { - const Instruction *TI = BB->getTerminator(); - (void) TI; - assert(TI->getNumSuccessors() > 1 && "expected more than one successor!"); - assert(!isa(TI) && - "Invokes should have already been handled by calcInvokeHeuristics"); - - // Determine which successors are post-dominated by a cold block. - SmallVector ColdEdges; - SmallVector NormalEdges; - for (const_succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) - if (PostDominatedByColdCall.count(*I)) - ColdEdges.push_back(I.getSuccessorIndex()); - else - NormalEdges.push_back(I.getSuccessorIndex()); - - // Skip probabilities if no cold edges. - if (ColdEdges.empty()) - return false; - - SmallVector EdgeProbabilities( - BB->getTerminator()->getNumSuccessors(), BranchProbability::getUnknown()); - if (NormalEdges.empty()) { - BranchProbability Prob(1, ColdEdges.size()); - for (unsigned SuccIdx : ColdEdges) - EdgeProbabilities[SuccIdx] = Prob; - setEdgeProbability(BB, EdgeProbabilities); - return true; - } - - auto ColdProb = BranchProbability::getBranchProbability( - CC_TAKEN_WEIGHT, - (CC_TAKEN_WEIGHT + CC_NONTAKEN_WEIGHT) * uint64_t(ColdEdges.size())); - auto NormalProb = BranchProbability::getBranchProbability( - CC_NONTAKEN_WEIGHT, - (CC_TAKEN_WEIGHT + CC_NONTAKEN_WEIGHT) * uint64_t(NormalEdges.size())); - - for (unsigned SuccIdx : ColdEdges) - EdgeProbabilities[SuccIdx] = ColdProb; - for (unsigned SuccIdx : NormalEdges) - EdgeProbabilities[SuccIdx] = NormalProb; - - setEdgeProbability(BB, EdgeProbabilities); - return true; -} - // Calculate Edge Weights using "Pointer Heuristics". Predict a comparison // between two pointer or pointer and NULL will fail. bool BranchProbabilityInfo::calcPointerHeuristics(const BasicBlock *BB) { @@ -775,81 +592,324 @@ computeUnlikelySuccessors(const BasicBlock *BB, Loop *L, } } -// Calculate Edge Weights using "Loop Branch Heuristics". Predict backedges -// as taken, exiting edges as not-taken. -bool BranchProbabilityInfo::calcLoopBranchHeuristics(const BasicBlock *BB, - const LoopInfo &LI) { - LoopBlock LB(BB, LI, *SccI.get()); - if (!LB.belongsToLoop()) +Optional +BranchProbabilityInfo::getEstimatedBlockWeight(const BasicBlock *BB) const { + auto WeightIt = EstimatedBlockWeight.find(BB); + if (WeightIt == EstimatedBlockWeight.end()) + return None; + return WeightIt->second; +} + +Optional +BranchProbabilityInfo::getEstimatedLoopWeight(const LoopData &L) const { + auto WeightIt = EstimatedLoopWeight.find(L); + if (WeightIt == EstimatedLoopWeight.end()) + return None; + return WeightIt->second; +} + +Optional +BranchProbabilityInfo::getEstimatedEdgeWeight(const LoopEdge &Edge) const { + // For edges entering a loop take weight of a loop rather than an individual + // block in the loop. + return isLoopEnteringEdge(Edge) + ? getEstimatedLoopWeight(Edge.second.getLoopData()) + : getEstimatedBlockWeight(Edge.second.getBlock()); +} + +template +Optional BranchProbabilityInfo::getMaxEstimatedEdgeWeight( + const LoopBlock &SrcLoopBB, iterator_range Successors) const { + SmallVector Weights; + Optional MaxWeight; + for (const BasicBlock *DstBB : Successors) { + const LoopBlock DstLoopBB = getLoopBlock(DstBB); + auto Weight = getEstimatedEdgeWeight({SrcLoopBB, DstLoopBB}); + + if (!Weight) + return None; + + if (!MaxWeight || MaxWeight.getValue() < Weight.getValue()) + MaxWeight = Weight; + } + + return MaxWeight; +} + +// Updates \p LoopBB's weight and returns true. If \p LoopBB has already +// an associated weight it is unchanged and false is returned. +// +// Please note by the algorithm the weight is not expected to change once set +// thus 'false' status is used to track visited blocks. +bool BranchProbabilityInfo::updateEstimatedBlockWeight( + LoopBlock &LoopBB, uint32_t BBWeight, + SmallVectorImpl &BlockWorkList, + SmallVectorImpl &LoopWorkList) { + BasicBlock *BB = LoopBB.getBlock(); + + // In general, weight is assigned to a block when it has final value and + // can't/shouldn't be changed. However, there are cases when a block + // inherently has several (possibly "contradicting") weights. For example, + // "unwind" block may also contain "cold" call. In that case the first + // set weight is favored and all consequent weights are ignored. + if (!EstimatedBlockWeight.insert({BB, BBWeight}).second) return false; - SmallPtrSet UnlikelyBlocks; - if (LB.getLoop()) - computeUnlikelySuccessors(BB, LB.getLoop(), UnlikelyBlocks); + for (BasicBlock *PredBlock : predecessors(BB)) { + LoopBlock PredLoop = getLoopBlock(PredBlock); + // Add affected block/loop to a working list. + if (isLoopExitingEdge({PredLoop, LoopBB})) { + if (!EstimatedLoopWeight.count(PredLoop.getLoopData())) + LoopWorkList.push_back(PredLoop); + } else if (!EstimatedBlockWeight.count(PredBlock)) + BlockWorkList.push_back(PredBlock); + } + return true; +} - SmallVector BackEdges; - SmallVector ExitingEdges; - SmallVector InEdges; // Edges from header to the loop. - SmallVector UnlikelyEdges; +// Starting from \p BB traverse through dominator blocks and assign \p BBWeight +// to all such blocks that are post dominated by \BB. In other words to all +// blocks that the one is executed if and only if another one is executed. +// Importantly, we skip loops here for two reasons. First weights of blocks in +// a loop should be scaled by trip count (yet possibly unknown). Second there is +// no any value in doing that because that doesn't give any additional +// information regarding distribution of probabilities inside the loop. +// Exception is loop 'enter' and 'exit' edges that are handled in a special way +// at calcEstimatedHeuristics. +// +// In addition, \p WorkList is populated with basic blocks if at leas one +// successor has updated estimated weight. +void BranchProbabilityInfo::propagateEstimatedBlockWeight( + const LoopBlock &LoopBB, DominatorTree *DT, PostDominatorTree *PDT, + uint32_t BBWeight, SmallVectorImpl &BlockWorkList, + SmallVectorImpl &LoopWorkList) { + const BasicBlock *BB = LoopBB.getBlock(); + const auto *DTStartNode = DT->getNode(BB); + const auto *PDTStartNode = PDT->getNode(BB); + + // TODO: Consider propagating weight down the domination line as well. + for (const auto *DTNode = DTStartNode; DTNode != nullptr; + DTNode = DTNode->getIDom()) { + auto *DomBB = DTNode->getBlock(); + // Consider blocks which lie on one 'line'. + if (!PDT->dominates(PDTStartNode, PDT->getNode(DomBB))) + // If BB doesn't post dominate DomBB it will not post dominate dominators + // of DomBB as well. + break; - for (const_succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) { - LoopBlock SuccLB(*I, LI, *SccI.get()); - LoopEdge Edge(LB, SuccLB); - bool IsUnlikelyEdge = LB.getLoop() && UnlikelyBlocks.contains(*I); - - if (IsUnlikelyEdge) - UnlikelyEdges.push_back(I.getSuccessorIndex()); - else if (isLoopExitingEdge(Edge)) - ExitingEdges.push_back(I.getSuccessorIndex()); - else if (isLoopBackEdge(Edge)) - BackEdges.push_back(I.getSuccessorIndex()); - else { - InEdges.push_back(I.getSuccessorIndex()); + LoopBlock DomLoopBB = getLoopBlock(DomBB); + const LoopEdge Edge{DomLoopBB, LoopBB}; + // Don't propagate weight to blocks belonging to different loops. + if (!isLoopEnteringExitingEdge(Edge)) { + if (!updateEstimatedBlockWeight(DomLoopBB, BBWeight, BlockWorkList, + LoopWorkList)) + // If DomBB has weight set then all it's predecessors are already + // processed (since we propagate weight up to the top of IR each time). + break; + } else if (isLoopExitingEdge(Edge)) { + LoopWorkList.push_back(DomLoopBB); } } +} + +Optional BranchProbabilityInfo::getInitialEstimatedBlockWeight( + const BasicBlock *BB) { + // Returns true if \p BB has call marked with "NoReturn" attribute. + auto hasNoReturn = [&](const BasicBlock *BB) { + for (const auto &I : reverse(*BB)) + if (const CallInst *CI = dyn_cast(&I)) + if (CI->hasFnAttr(Attribute::NoReturn)) + return true; - if (BackEdges.empty() && ExitingEdges.empty() && UnlikelyEdges.empty()) return false; + }; + + // Important note regarding the order of checks. They are ordered by weight + // from lowest to highest. Doing that allows to avoid "unstable" results + // when several conditions heuristics can be applied simultaneously. + if (isa(BB->getTerminator()) || + // If this block is terminated by a call to + // @llvm.experimental.deoptimize then treat it like an unreachable + // since it is expected to practically never execute. + // TODO: Should we actually treat as never returning call? + BB->getTerminatingDeoptimizeCall()) + return hasNoReturn(BB) + ? static_cast(BlockExecWeight::NORETURN) + : static_cast(BlockExecWeight::UNREACHABLE); + + // Check if the block is 'unwind' handler of some invoke instruction. + for (const auto *Pred : predecessors(BB)) + if (Pred) + if (const auto *II = dyn_cast(Pred->getTerminator())) + if (II->getUnwindDest() == BB) + return static_cast(BlockExecWeight::UNWIND); + + // Check if the block contains 'cold' call. + for (const auto &I : *BB) + if (const CallInst *CI = dyn_cast(&I)) + if (CI->hasFnAttr(Attribute::Cold)) + return static_cast(BlockExecWeight::COLD); + + return None; +} - // Collect the sum of probabilities of back-edges/in-edges/exiting-edges, and - // normalize them so that they sum up to one. - unsigned Denom = (BackEdges.empty() ? 0 : LBH_TAKEN_WEIGHT) + - (InEdges.empty() ? 0 : LBH_TAKEN_WEIGHT) + - (UnlikelyEdges.empty() ? 0 : LBH_UNLIKELY_WEIGHT) + - (ExitingEdges.empty() ? 0 : LBH_NONTAKEN_WEIGHT); +// Does RPO traversal over all blocks in \p F and assigns weights to +// 'unreachable', 'noreturn', 'cold', 'unwind' blocks. In addition it does its +// best to propagate the weight to up/down the IR. +void BranchProbabilityInfo::computeEestimateBlockWeight( + const Function &F, DominatorTree *DT, PostDominatorTree *PDT) { + SmallVector BlockWorkList; + SmallVector LoopWorkList; + + // By doing RPO we make sure that all predecessors already have weights + // calculated before visiting theirs successors. + ReversePostOrderTraversal RPOT(&F); + for (const auto *BB : RPOT) + if (auto BBWeight = getInitialEstimatedBlockWeight(BB)) + // If we were able to find estimated weight for the block set it to this + // block and propagate up the IR. + propagateEstimatedBlockWeight(getLoopBlock(BB), DT, PDT, + BBWeight.getValue(), BlockWorkList, + LoopWorkList); + + // BlockWorklist/LoopWorkList contains blocks/loops with at least one + // successor/exit having estimated weight. Try to propagate weight to such + // blocks/loops from successors/exits. + // Process loops and blocks. Order is not important. + do { + while (!LoopWorkList.empty()) { + const LoopBlock LoopBB = LoopWorkList.pop_back_val(); + + if (EstimatedLoopWeight.count(LoopBB.getLoopData())) + continue; - SmallVector EdgeProbabilities( - BB->getTerminator()->getNumSuccessors(), BranchProbability::getUnknown()); - if (uint32_t numBackEdges = BackEdges.size()) { - BranchProbability TakenProb = BranchProbability(LBH_TAKEN_WEIGHT, Denom); - auto Prob = TakenProb / numBackEdges; - for (unsigned SuccIdx : BackEdges) - EdgeProbabilities[SuccIdx] = Prob; - } + SmallVector Exits; + getLoopExitBlocks(LoopBB, Exits); + auto LoopWeight = getMaxEstimatedEdgeWeight( + LoopBB, make_range(Exits.begin(), Exits.end())); - if (uint32_t numInEdges = InEdges.size()) { - BranchProbability TakenProb = BranchProbability(LBH_TAKEN_WEIGHT, Denom); - auto Prob = TakenProb / numInEdges; - for (unsigned SuccIdx : InEdges) - EdgeProbabilities[SuccIdx] = Prob; - } + if (LoopWeight) { + // If we never exit the loop then we can enter it once at maximum. + if (LoopWeight <= static_cast(BlockExecWeight::UNREACHABLE)) + LoopWeight = static_cast(BlockExecWeight::LOWEST_NON_ZERO); + + EstimatedLoopWeight.insert( + {LoopBB.getLoopData(), LoopWeight.getValue()}); + // Add all blocks entering the loop into working list. + getLoopEnterBlocks(LoopBB, BlockWorkList); + } + } - if (uint32_t numExitingEdges = ExitingEdges.size()) { - BranchProbability NotTakenProb = BranchProbability(LBH_NONTAKEN_WEIGHT, - Denom); - auto Prob = NotTakenProb / numExitingEdges; - for (unsigned SuccIdx : ExitingEdges) - EdgeProbabilities[SuccIdx] = Prob; + while (!BlockWorkList.empty()) { + // We can reach here only if BlockWorkList is not empty. + const BasicBlock *BB = BlockWorkList.pop_back_val(); + if (EstimatedBlockWeight.count(BB)) + continue; + + // We take maximum over all weights of successors. In other words we take + // weight of "hot" path. In theory we can probably find a better function + // which gives higher accuracy results (comparing to "maximum") but I + // can't + // think of any right now. And I doubt it will make any difference in + // practice. + const LoopBlock LoopBB = getLoopBlock(BB); + auto MaxWeight = getMaxEstimatedEdgeWeight(LoopBB, successors(BB)); + + if (MaxWeight) + propagateEstimatedBlockWeight(LoopBB, DT, PDT, MaxWeight.getValue(), + BlockWorkList, LoopWorkList); + } + } while (!BlockWorkList.empty() || !LoopWorkList.empty()); +} + +// Calculate edge probabilities based on block's estimated weight. +// Note that gathered weights were not scaled for loops. Thus edges entering +// and exiting loops requires special processing. +bool BranchProbabilityInfo::calcEstimatedHeuristics(const BasicBlock *BB) { + assert(BB->getTerminator()->getNumSuccessors() > 1 && + "expected more than one successor!"); + + const LoopBlock LoopBB = getLoopBlock(BB); + + SmallPtrSet UnlikelyBlocks; + uint32_t TC = LBH_TAKEN_WEIGHT / LBH_NONTAKEN_WEIGHT; + if (LoopBB.getLoop()) + computeUnlikelySuccessors(BB, LoopBB.getLoop(), UnlikelyBlocks); + + // Changed to 'true' if at least one successor has estimated weight. + bool FoundEstimatedWeight = false; + SmallVector SuccWeights; + uint64_t TotalWeight = 0; + // Go over all successors of BB and put their weights into SuccWeights. + for (const_succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) { + const BasicBlock *SuccBB = *I; + Optional Weight; + const LoopBlock SuccLoopBB = getLoopBlock(SuccBB); + const LoopEdge Edge{LoopBB, SuccLoopBB}; + + Weight = getEstimatedEdgeWeight(Edge); + + if (isLoopExitingEdge(Edge) && + // Avoid adjustment of ZERO weight since it should remain unchanged. + Weight != static_cast(BlockExecWeight::ZERO)) { + // Scale down loop exiting weight by trip count. + Weight = std::max( + static_cast(BlockExecWeight::LOWEST_NON_ZERO), + Weight.getValueOr(static_cast(BlockExecWeight::DEFAULT)) / + TC); + } + bool IsUnlikelyEdge = LoopBB.getLoop() && UnlikelyBlocks.contains(SuccBB); + if (IsUnlikelyEdge && + // Avoid adjustment of ZERO weight since it should remain unchanged. + Weight != static_cast(BlockExecWeight::ZERO)) { + // 'Unlikely' blocks have twice lower weight. + Weight = std::max( + static_cast(BlockExecWeight::LOWEST_NON_ZERO), + Weight.getValueOr(static_cast(BlockExecWeight::DEFAULT)) / + 2); + } + + if (Weight) + FoundEstimatedWeight = true; + + auto WeightVal = + Weight.getValueOr(static_cast(BlockExecWeight::DEFAULT)); + TotalWeight += WeightVal; + SuccWeights.push_back(WeightVal); } - if (uint32_t numUnlikelyEdges = UnlikelyEdges.size()) { - BranchProbability UnlikelyProb = BranchProbability(LBH_UNLIKELY_WEIGHT, - Denom); - auto Prob = UnlikelyProb / numUnlikelyEdges; - for (unsigned SuccIdx : UnlikelyEdges) - EdgeProbabilities[SuccIdx] = Prob; + // If non of blocks have estimated weight bail out. + // If TotalWeight is 0 that means weight of each successor is 0 as well and + // equally likely. Bail out early to not deal with devision by zero. + if (!FoundEstimatedWeight || TotalWeight == 0) + return false; + + assert(SuccWeights.size() == succ_size(BB) && "Missed successor?"); + const unsigned SuccCount = SuccWeights.size(); + + // If the sum of weights does not fit in 32 bits, scale every weight down + // accordingly. + if (TotalWeight > UINT32_MAX) { + uint64_t ScalingFactor = TotalWeight / UINT32_MAX + 1; + TotalWeight = 0; + for (unsigned Idx = 0; Idx < SuccCount; ++Idx) { + SuccWeights[Idx] /= ScalingFactor; + if (SuccWeights[Idx] == static_cast(BlockExecWeight::ZERO)) + SuccWeights[Idx] = + static_cast(BlockExecWeight::LOWEST_NON_ZERO); + TotalWeight += SuccWeights[Idx]; + } + assert(TotalWeight <= UINT32_MAX && "Total weight overflows"); } + // Finally set probabilities to edges according to estimated block weights. + SmallVector EdgeProbabilities( + SuccCount, BranchProbability::getUnknown()); + + for (unsigned Idx = 0; Idx < SuccCount; ++Idx) { + EdgeProbabilities[Idx] = + BranchProbability(SuccWeights[Idx], (uint32_t)TotalWeight); + } setEdgeProbability(BB, EdgeProbabilities); return true; } @@ -1015,18 +1075,6 @@ bool BranchProbabilityInfo::calcFloatingPointHeuristics(const BasicBlock *BB) { return true; } -bool BranchProbabilityInfo::calcInvokeHeuristics(const BasicBlock *BB) { - const InvokeInst *II = dyn_cast(BB->getTerminator()); - if (!II) - return false; - - BranchProbability TakenProb(IH_TAKEN_WEIGHT, - IH_TAKEN_WEIGHT + IH_NONTAKEN_WEIGHT); - setEdgeProbability( - BB, SmallVector({TakenProb, TakenProb.getCompl()})); - return true; -} - void BranchProbabilityInfo::releaseMemory() { Probs.clear(); Handles.clear(); @@ -1202,26 +1250,34 @@ void BranchProbabilityInfo::eraseBlock(const BasicBlock *BB) { } } -void BranchProbabilityInfo::calculate(const Function &F, const LoopInfo &LI, +void BranchProbabilityInfo::calculate(const Function &F, const LoopInfo &LoopI, const TargetLibraryInfo *TLI, + DominatorTree *DT, PostDominatorTree *PDT) { LLVM_DEBUG(dbgs() << "---- Branch Probability Info : " << F.getName() << " ----\n\n"); LastF = &F; // Store the last function we ran on for printing. - assert(PostDominatedByUnreachable.empty()); - assert(PostDominatedByColdCall.empty()); + LI = &LoopI; SccI = std::make_unique(F); + assert(EstimatedBlockWeight.empty()); + assert(EstimatedLoopWeight.empty()); + + std::unique_ptr DTPtr; std::unique_ptr PDTPtr; + if (!DT) { + DTPtr = std::make_unique(const_cast(F)); + DT = DTPtr.get(); + } + if (!PDT) { PDTPtr = std::make_unique(const_cast(F)); PDT = PDTPtr.get(); } - computePostDominatedByUnreachable(F, PDT); - computePostDominatedByColdCall(F, PDT); + computeEestimateBlockWeight(F, DT, PDT); // Walk the basic blocks in post-order so that we can build up state about // the successors of a block iteratively. @@ -1233,13 +1289,7 @@ void BranchProbabilityInfo::calculate(const Function &F, const LoopInfo &LI, continue; if (calcMetadataWeights(BB)) continue; - if (calcInvokeHeuristics(BB)) - continue; - if (calcUnreachableHeuristics(BB)) - continue; - if (calcColdCallHeuristics(BB)) - continue; - if (calcLoopBranchHeuristics(BB, LI)) + if (calcEstimatedHeuristics(BB)) continue; if (calcPointerHeuristics(BB)) continue; @@ -1249,8 +1299,8 @@ void BranchProbabilityInfo::calculate(const Function &F, const LoopInfo &LI, continue; } - PostDominatedByUnreachable.clear(); - PostDominatedByColdCall.clear(); + EstimatedLoopWeight.clear(); + EstimatedBlockWeight.clear(); SccI.reset(); if (PrintBranchProb && @@ -1268,6 +1318,7 @@ void BranchProbabilityInfoWrapperPass::getAnalysisUsage( AU.addRequired(); AU.addRequired(); AU.addRequired(); + AU.addRequired(); AU.addRequired(); AU.setPreservesAll(); } @@ -1276,9 +1327,10 @@ bool BranchProbabilityInfoWrapperPass::runOnFunction(Function &F) { const LoopInfo &LI = getAnalysis().getLoopInfo(); const TargetLibraryInfo &TLI = getAnalysis().getTLI(F); + DominatorTree &DT = getAnalysis().getDomTree(); PostDominatorTree &PDT = getAnalysis().getPostDomTree(); - BPI.calculate(F, LI, &TLI, &PDT); + BPI.calculate(F, LI, &TLI, &DT, &PDT); return false; } @@ -1295,6 +1347,7 @@ BranchProbabilityAnalysis::run(Function &F, FunctionAnalysisManager &AM) { BranchProbabilityInfo BPI; BPI.calculate(F, AM.getResult(F), &AM.getResult(F), + &AM.getResult(F), &AM.getResult(F)); return BPI; } diff --git a/llvm/lib/Analysis/OptimizationRemarkEmitter.cpp b/llvm/lib/Analysis/OptimizationRemarkEmitter.cpp index 385666a21d05d..6f3d4d536c401 100644 --- a/llvm/lib/Analysis/OptimizationRemarkEmitter.cpp +++ b/llvm/lib/Analysis/OptimizationRemarkEmitter.cpp @@ -37,7 +37,7 @@ OptimizationRemarkEmitter::OptimizationRemarkEmitter(const Function *F) LI.analyze(DT); // Then compute BranchProbabilityInfo. - BranchProbabilityInfo BPI(*F, LI); + BranchProbabilityInfo BPI(*F, LI, nullptr, &DT, nullptr); // Finally compute BFI. OwnedBFI = std::make_unique(*F, BPI, LI); diff --git a/llvm/lib/Transforms/Scalar/LoopPredication.cpp b/llvm/lib/Transforms/Scalar/LoopPredication.cpp index 3ca5b985c365e..4f97641e2027c 100644 --- a/llvm/lib/Transforms/Scalar/LoopPredication.cpp +++ b/llvm/lib/Transforms/Scalar/LoopPredication.cpp @@ -362,7 +362,7 @@ PreservedAnalyses LoopPredicationPass::run(Loop &L, LoopAnalysisManager &AM, // For the new PM, we also can't use BranchProbabilityInfo as an analysis // pass. Function analyses need to be preserved across loop transformations // but BPI is not preserved, hence a newly built one is needed. - BranchProbabilityInfo BPI(*F, AR.LI, &AR.TLI); + BranchProbabilityInfo BPI(*F, AR.LI, &AR.TLI, &AR.DT, nullptr); LoopPredication LP(&AR.AA, &AR.DT, &AR.SE, &AR.LI, &BPI); if (!LP.runOnLoop(&L)) return PreservedAnalyses::all(); diff --git a/llvm/test/Analysis/BlockFrequencyInfo/redundant_edges.ll b/llvm/test/Analysis/BlockFrequencyInfo/redundant_edges.ll index 2db9e024c15b1..c4ae0191eaab6 100644 --- a/llvm/test/Analysis/BlockFrequencyInfo/redundant_edges.ll +++ b/llvm/test/Analysis/BlockFrequencyInfo/redundant_edges.ll @@ -9,7 +9,7 @@ define void @test1() { entry: br label %loop -; CHECK-NEXT: loop: float = 32.0 +; CHECK-NEXT: loop: float = 16.5 loop: switch i32 undef, label %loop [ i32 0, label %return diff --git a/llvm/test/Analysis/BranchProbabilityInfo/basic.ll b/llvm/test/Analysis/BranchProbabilityInfo/basic.ll index 901dc9fce645b..84d801aab4cb0 100644 --- a/llvm/test/Analysis/BranchProbabilityInfo/basic.ll +++ b/llvm/test/Analysis/BranchProbabilityInfo/basic.ll @@ -124,8 +124,8 @@ define i32 @test5(i32 %a, i32 %b, i1 %flag) { ; CHECK: Printing analysis {{.*}} for function 'test5' entry: br i1 %flag, label %then, label %else -; CHECK: edge entry -> then probability is 0x07878788 / 0x80000000 = 5.88% -; CHECK: edge entry -> else probability is 0x78787878 / 0x80000000 = 94.12% [HOT edge] +; CHECK: edge entry -> then probability is 0x078780e3 / 0x80000000 = 5.88% +; CHECK: edge entry -> else probability is 0x78787f1d / 0x80000000 = 94.12% [HOT edge] then: call void @coldfunc() @@ -145,15 +145,16 @@ define i32 @test_cold_loop(i32 %a, i32 %b) { entry: %cond1 = icmp eq i32 %a, 42 br i1 %cond1, label %header, label %exit - +; CHECK: edge entry -> header probability is 0x40000000 / 0x80000000 = 50.00% +; CHECK: edge entry -> exit probability is 0x40000000 / 0x80000000 = 50.00% header: br label %body body: %cond2 = icmp eq i32 %b, 42 br i1 %cond2, label %header, label %exit -; CHECK: edge body -> header probability is 0x40000000 / 0x80000000 = 50.00% - +; CHECK: edge body -> header probability is 0x7fbe1203 / 0x80000000 = 99.80% [HOT edge] +; CHECK: edge body -> exit probability is 0x0041edfd / 0x80000000 = 0.20% exit: call void @coldfunc() ret i32 %b @@ -165,8 +166,8 @@ define i32 @test_cold_call_sites_with_prof(i32 %a, i32 %b, i1 %flag, i1 %flag2) ; CHECK: Printing analysis {{.*}} for function 'test_cold_call_sites_with_prof' entry: br i1 %flag, label %then, label %else -; CHECK: edge entry -> then probability is 0x07878788 / 0x80000000 = 5.88% -; CHECK: edge entry -> else probability is 0x78787878 / 0x80000000 = 94.12% [HOT edge] +; CHECK: edge entry -> then probability is 0x078780e3 / 0x80000000 = 5.88% +; CHECK: edge entry -> else probability is 0x78787f1d / 0x80000000 = 94.12% [HOT edge] then: br i1 %flag2, label %then2, label %else2, !prof !3 @@ -206,8 +207,8 @@ define i32 @test_cold_call_sites(i32* %a) { ; after that is fixed. ; CHECK: Printing analysis {{.*}} for function 'test_cold_call_sites' -; CHECK: edge entry -> then probability is 0x07878788 / 0x80000000 = 5.88% -; CHECK: edge entry -> else probability is 0x78787878 / 0x80000000 = 94.12% [HOT edge] +; CHECK: edge entry -> then probability is 0x078780e3 / 0x80000000 = 5.88% +; CHECK: edge entry -> else probability is 0x78787f1d / 0x80000000 = 94.12% [HOT edge] entry: %gep1 = getelementptr i32, i32* %a, i32 1 @@ -238,14 +239,14 @@ entry: ; Edge "entry->if.end" should have higher probability based on the cold call ; heuristic which treat %if.then as a cold block because the normal destination ; of the invoke instruction in %if.then is post-dominated by ColdFunc(). -; CHECK: edge entry -> if.then probability is 0x07878788 / 0x80000000 = 5.88% -; CHECK: edge entry -> if.end probability is 0x78787878 / 0x80000000 = 94.12% [HOT edge] +; CHECK: edge entry -> if.then probability is 0x078780e3 / 0x80000000 = 5.88% +; CHECK: edge entry -> if.end probability is 0x78787f1d / 0x80000000 = 94.12% [HOT edge] if.then: invoke i32 @InvokeCall() to label %invoke.cont unwind label %lpad -; CHECK: edge if.then -> invoke.cont probability is 0x7ffff800 / 0x80000000 = 100.00% [HOT edge] -; CHECK: edge if.then -> lpad probability is 0x00000800 / 0x80000000 = 0.00% +; CHECK: edge if.then -> invoke.cont probability is 0x7fff8000 / 0x80000000 = 100.00% [HOT edge] +; CHECK: edge if.then -> lpad probability is 0x00008000 / 0x80000000 = 0.00% invoke.cont: call void @ColdFunc() #0 @@ -267,13 +268,12 @@ entry: ; CHECK: edge entry -> if.then probability is 0x40000000 / 0x80000000 = 50.00% ; CHECK: edge entry -> if.end probability is 0x40000000 / 0x80000000 = 50.00% - if.then: invoke i32 @InvokeCall() to label %invoke.cont unwind label %lpad ; The cold call heuristic should not kick in when the cold callsite is in EH path. -; CHECK: edge if.then -> invoke.cont probability is 0x7ffff800 / 0x80000000 = 100.00% [HOT edge] -; CHECK: edge if.then -> lpad probability is 0x00000800 / 0x80000000 = 0.00% +; CHECK: edge if.then -> invoke.cont probability is 0x7ffff800 / 0x80000000 = 100.00% [HOT edge] +; CHECK: edge if.then -> lpad probability is 0x00000800 / 0x80000000 = 0.00% invoke.cont: br label %if.end @@ -292,16 +292,16 @@ if.end: define i32 @test_invoke_code_callsite3(i1 %c) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { entry: br i1 %c, label %if.then, label %if.end -; CHECK: edge entry -> if.then probability is 0x07878788 / 0x80000000 = 5.88% -; CHECK: edge entry -> if.end probability is 0x78787878 / 0x80000000 = 94.12% [HOT edge] +; CHECK: edge entry -> if.then probability is 0x078780e3 / 0x80000000 = 5.88% +; CHECK: edge entry -> if.end probability is 0x78787f1d / 0x80000000 = 94.12% [HOT edge] if.then: invoke i32 @InvokeCall() to label %invoke.cont unwind label %lpad ; Regardless of cold calls, edge weights from a invoke instruction should be ; determined by the invoke heuristic. -; CHECK: edge if.then -> invoke.cont probability is 0x7ffff800 / 0x80000000 = 100.00% [HOT edge] -; CHECK: edge if.then -> lpad probability is 0x00000800 / 0x80000000 = 0.00% +; CHECK: edge if.then -> invoke.cont probability is 0x7fff8000 / 0x80000000 = 100.00% [HOT edge] +; CHECK: edge if.then -> lpad probability is 0x00008000 / 0x80000000 = 0.00% invoke.cont: call void @ColdFunc() #0 diff --git a/llvm/test/Analysis/BranchProbabilityInfo/deopt-intrinsic.ll b/llvm/test/Analysis/BranchProbabilityInfo/deopt-intrinsic.ll index 541d7d954fb24..b40b5d7913929 100644 --- a/llvm/test/Analysis/BranchProbabilityInfo/deopt-intrinsic.ll +++ b/llvm/test/Analysis/BranchProbabilityInfo/deopt-intrinsic.ll @@ -9,8 +9,8 @@ entry: %cond = icmp eq i32 %a, 42 br i1 %cond, label %exit, label %deopt -; CHECK: edge entry -> exit probability is 0x7fffffff / 0x80000000 = 100.00% [HOT edge] -; CHECK: edge entry -> deopt probability is 0x00000001 / 0x80000000 = 0.00% +; CHECK: edge entry -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] +; CHECK: edge entry -> deopt probability is 0x00000000 / 0x80000000 = 0.00% deopt: %rval = call i32(...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] diff --git a/llvm/test/Analysis/BranchProbabilityInfo/deopt-invoke.ll b/llvm/test/Analysis/BranchProbabilityInfo/deopt-invoke.ll new file mode 100644 index 0000000000000..bea32bbe5979b --- /dev/null +++ b/llvm/test/Analysis/BranchProbabilityInfo/deopt-invoke.ll @@ -0,0 +1,107 @@ +; RUN: opt -analyze -branch-prob < %s | FileCheck %s +; RUN: opt < %s -passes='print' -disable-output 2>&1 | FileCheck %s + +declare i32* @"personality_function"() #1 +declare void @foo(i32) +declare void @bar() +declare void @llvm.experimental.deoptimize.isVoid(...) +declare void @cold() cold + +; Even though the likeliness of 'invoke' to throw an exception is assessed as low +; all other paths are even less likely. Check that hot paths leads to excepion handler. +define void @test1(i32 %0) personality i32* ()* @"personality_function" !prof !1 { +;CHECK: edge entry -> unreached probability is 0x00000001 / 0x80000000 = 0.00% +;CHECK: edge entry -> invoke probability is 0x7fffffff / 0x80000000 = 100.00% [HOT edge] +;CHECK: edge invoke -> invoke.cont.unreached probability is 0x00000000 / 0x80000000 = 0.00% +;CHECK: edge invoke -> land.pad probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] +;CHECK: edge land.pad -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +entry: + br i1 undef, label %unreached, label %invoke, !prof !2 +invoke: + invoke void @foo(i32 %0) + to label %invoke.cont.unreached unwind label %land.pad +invoke.cont.unreached: + call void (...) @llvm.experimental.deoptimize.isVoid(i32 10) [ "deopt"() ] + ret void + +unreached: + unreachable + +land.pad: + %v20 = landingpad { i8*, i32 } + cleanup + %v21 = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(256)* inttoptr (i64 8 to i8 addrspace(1)* addrspace(256)*), align 8 + br label %exit + +exit: + call void @bar() + ret void +} + +define void @test2(i32 %0) personality i32* ()* @"personality_function" { +;CHECK: edge entry -> unreached probability is 0x00000000 / 0x80000000 = 0.00% +;CHECK: edge entry -> invoke probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] +;CHECK: edge invoke -> invoke.cont.cold probability is 0x7fff8000 / 0x80000000 = 100.00% [HOT edge] +;CHECK: edge invoke -> land.pad probability is 0x00008000 / 0x80000000 = 0.00% +;CHECK: edge land.pad -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +entry: + br i1 undef, label %unreached, label %invoke +invoke: + invoke void @foo(i32 %0) + to label %invoke.cont.cold unwind label %land.pad +invoke.cont.cold: + call void @cold() + ret void + +unreached: + unreachable + +land.pad: + %v20 = landingpad { i8*, i32 } + cleanup + %v21 = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(256)* inttoptr (i64 8 to i8 addrspace(1)* addrspace(256)*), align 8 + br label %exit + +exit: + call void @bar() + ret void +} + +define void @test3(i32 %0) personality i32* ()* @"personality_function" { +;CHECK: edge entry -> unreached probability is 0x00000000 / 0x80000000 = 0.00% +;CHECK: edge entry -> invoke probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] +;CHECK: edge invoke -> invoke.cont.cold probability is 0x7fff8000 / 0x80000000 = 100.00% [HOT edge] +;CHECK: edge invoke -> land.pad probability is 0x00008000 / 0x80000000 = 0.00% +;CHECK: edge land.pad -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] +entry: + br i1 undef, label %unreached, label %invoke +invoke: + invoke void @foo(i32 %0) + to label %invoke.cont.cold unwind label %land.pad +invoke.cont.cold: + call void @cold() + ret void + +unreached: + unreachable + +land.pad: + %v20 = landingpad { i8*, i32 } + cleanup + %v21 = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(256)* inttoptr (i64 8 to i8 addrspace(1)* addrspace(256)*), align 8 + call void @cold() + br label %exit + +exit: + call void @bar() + ret void +} + + +attributes #1 = { nounwind } + +!1 = !{!"function_entry_count", i64 32768} +!2 = !{!"branch_weights", i32 1, i32 983040} + diff --git a/llvm/test/Analysis/BranchProbabilityInfo/loop.ll b/llvm/test/Analysis/BranchProbabilityInfo/loop.ll index 26e041b652841..b3fa908064eb6 100644 --- a/llvm/test/Analysis/BranchProbabilityInfo/loop.ll +++ b/llvm/test/Analysis/BranchProbabilityInfo/loop.ll @@ -428,9 +428,8 @@ for.body: %inc = add nsw i32 %count.0, 1 %cmp1 = icmp sgt i32 %count.0, 6 br i1 %cmp1, label %if.then, label %for.inc -; CHECK: edge for.body -> if.then probability is 0x2aaaaaab / 0x80000000 = 33.33% -; CHECK: edge for.body -> for.inc probability is 0x55555555 / 0x80000000 = 66.67% - +; CHECK: edge for.body -> if.then probability is 0x2aaaa8e4 / 0x80000000 = 33.33% +; CHECK: edge for.body -> for.inc probability is 0x5555571c / 0x80000000 = 66.67% if.then: store i32 %add, i32* %arrayidx, align 4 br label %for.inc @@ -521,3 +520,207 @@ exit: } declare i32 @InvokeCall() +declare void @cold() cold + +; If loop has single exit and it leads to 'cold' block then edge leading to loop enter +; should be considered 'cold' as well. +define void @test13() { +; CHECK: edge entry -> loop probability is 0x078780e3 / 0x80000000 = 5.88% +; CHECK: edge entry -> exit probability is 0x78787f1d / 0x80000000 = 94.12% [HOT edge] +; CHECK: edge loop -> loop probability is 0x7fbe1203 / 0x80000000 = 99.80% [HOT edge] +; CHECK: edge loop -> cold probability is 0x0041edfd / 0x80000000 = 0.20% +; CHECK: edge cold -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +entry: + br i1 undef, label %loop, label %exit + +loop: + %i.0 = phi i32 [ 0, %entry ], [ %inc, %loop ] + %inc = add nsw i32 %i.0, 1 + br i1 undef, label %loop, label %cold + +cold: + call void @cold() + br label %exit + +exit: + ret void +} + +; This is the same case as test13 but with additional loop 'preheader' block. +define void @test14() { +; CHECK: edge entry -> preheader probability is 0x078780e3 / 0x80000000 = 5.88% +; CHECK: edge entry -> exit probability is 0x78787f1d / 0x80000000 = 94.12% [HOT edge] +; CHECK: edge preheader -> loop probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] +; CHECK: edge loop -> loop probability is 0x7fbe1203 / 0x80000000 = 99.80% [HOT edge] +; CHECK: edge loop -> cold probability is 0x0041edfd / 0x80000000 = 0.20% +; CHECK: edge cold -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +entry: + br i1 undef, label %preheader, label %exit + +preheader: + br label %loop + +loop: + %i.0 = phi i32 [ 0, %preheader ], [ %inc, %loop ] + %inc = add nsw i32 %i.0, 1 + br i1 undef, label %loop, label %cold + +cold: + call void @cold() + br label %exit + +exit: + ret void +} + +; If loop has multiple low probability exits then edge leading to loop enter +; should be considered low probable as well. +define void @test15() { +; CHECK: edge entry -> loop probability is 0x078780e3 / 0x80000000 = 5.88% +; CHECK: edge entry -> exit probability is 0x78787f1d / 0x80000000 = 94.12% [HOT edge] +; CHECK: edge loop -> cont probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] +; CHECK: edge loop -> unreached probability is 0x00000000 / 0x80000000 = 0.00% +; CHECK: edge cont -> loop probability is 0x7fbe1203 / 0x80000000 = 99.80% [HOT edge] +; CHECK: edge cont -> cold probability is 0x0041edfd / 0x80000000 = 0.20% +; CHECK: edge cold -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +entry: + br i1 undef, label %loop, label %exit + +loop: + %i.0 = phi i32 [ 0, %entry ], [ %inc, %cont ] + %inc = add nsw i32 %i.0, 1 + br i1 undef, label %cont, label %unreached + +cont: + br i1 undef, label %loop, label %cold + +unreached: + unreachable + + +cold: + call void @cold() + br label %exit + +exit: + ret void +} + +; This is the same case as test15 but with additional loop 'preheader' block. +define void @test16() { +; CHECK: edge entry -> preheader probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] +; CHECK: edge preheader -> loop probability is 0x078780e3 / 0x80000000 = 5.88% +; CHECK: edge preheader -> exit probability is 0x78787f1d / 0x80000000 = 94.12% [HOT edge] +; CHECK: edge loop -> cont probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] +; CHECK: edge loop -> unreached probability is 0x00000000 / 0x80000000 = 0.00% +; CHECK: edge cont -> loop probability is 0x7fbe1203 / 0x80000000 = 99.80% [HOT edge] +; CHECK: edge cont -> cold probability is 0x0041edfd / 0x80000000 = 0.20% +; CHECK: edge cold -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +entry: + br label %preheader + +preheader: + br i1 undef, label %loop, label %exit + +loop: + %i.0 = phi i32 [ 0, %preheader ], [ %inc, %cont ] + %inc = add nsw i32 %i.0, 1 + br i1 undef, label %cont, label %unreached + +cont: + br i1 undef, label %loop, label %cold + +unreached: + unreachable + + +cold: + call void @cold() + br label %exit + +exit: + ret void +} + +declare void @abort() noreturn + +; Check that 'preheader' has 50/50 probability since there is one 'normal' exit. +; Check that exit to 'cold' and 'noreturn' has lower probability than 'normal' exit. +define void @test17() { +; CHECK: edge entry -> preheader probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] +; CHECK: edge preheader -> loop probability is 0x40000000 / 0x80000000 = 50.00% +; CHECK: edge preheader -> exit probability is 0x40000000 / 0x80000000 = 50.00% +; CHECK: edge loop -> cont probability is 0x7ffff800 / 0x80000000 = 100.00% [HOT edge] +; CHECK: edge loop -> noreturn probability is 0x00000800 / 0x80000000 = 0.00% +; CHECK: edge cont -> cont2 probability is 0x7fbe1203 / 0x80000000 = 99.80% [HOT edge] +; CHECK: edge cont -> cold probability is 0x0041edfd / 0x80000000 = 0.20% +; CHECK: edge cont2 -> loop probability is 0x7c000000 / 0x80000000 = 96.88% [HOT edge] +; CHECK: edge cont2 -> exit probability is 0x04000000 / 0x80000000 = 3.12% +; CHECK: edge cold -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] +entry: + br label %preheader + +preheader: + br i1 undef, label %loop, label %exit + +loop: + %i.0 = phi i32 [ 0, %preheader ], [ %inc, %cont2 ] + %inc = add nsw i32 %i.0, 1 + br i1 undef, label %cont, label %noreturn + +cont: + br i1 undef, label %cont2, label %cold + +cont2: + br i1 undef, label %loop, label %exit + +noreturn: + call void @abort() + unreachable + +cold: + call void @cold() + br label %exit + +exit: + ret void +} + + +; This is case with two loops where one nested into another. Nested loop has +; low probable exit what encreases robability to take exit in the top level loop. +define void @test18() { +; CHECK: edge entry -> top.loop probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] +; CHECK: edge top.loop -> loop probability is 0x546cd4b7 / 0x80000000 = 65.96% +; CHECK: edge top.loop -> exit probability is 0x2b932b49 / 0x80000000 = 34.04% +; CHECK: edge loop -> loop probability is 0x7fbe1203 / 0x80000000 = 99.80% [HOT edge] +; CHECK: edge loop -> cold probability is 0x0041edfd / 0x80000000 = 0.20% +; CHECK: edge cold -> top.loop probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +entry: + br label %top.loop + +top.loop: + %j.0 = phi i32 [ 0, %entry ], [ %j.inc, %cold ] + br i1 undef, label %loop, label %exit + +loop: + %i.0 = phi i32 [ %j.0, %top.loop ], [ %inc, %loop ] + %inc = add nsw i32 %i.0, 1 + br i1 undef, label %loop, label %cold + +cold: + call void @cold() + %j.inc = add nsw i32 %j.0, 1 + br label %top.loop + +exit: + ret void +} + + + diff --git a/llvm/test/Analysis/BranchProbabilityInfo/noreturn.ll b/llvm/test/Analysis/BranchProbabilityInfo/noreturn.ll index 8c23a1d3bfc9a..e42a2d83b0c6a 100644 --- a/llvm/test/Analysis/BranchProbabilityInfo/noreturn.ll +++ b/llvm/test/Analysis/BranchProbabilityInfo/noreturn.ll @@ -9,8 +9,8 @@ define i32 @test1(i32 %a, i32 %b) { entry: %cond = icmp eq i32 %a, 42 br i1 %cond, label %exit, label %abort -; CHECK: edge entry -> exit probability is 0x7fffffff / 0x80000000 = 100.00% [HOT edge] -; CHECK: edge entry -> abort probability is 0x00000001 / 0x80000000 = 0.00% +; CHECK: edge entry -> exit probability is 0x7ffff800 / 0x80000000 = 100.00% [HOT edge] +; CHECK: edge entry -> abort probability is 0x00000800 / 0x80000000 = 0.00% abort: call void @abort() noreturn @@ -27,11 +27,11 @@ entry: i32 2, label %case_b i32 3, label %case_c i32 4, label %case_d] -; CHECK: edge entry -> exit probability is 0x7ffffffc / 0x80000000 = 100.00% [HOT edge] -; CHECK: edge entry -> case_a probability is 0x00000001 / 0x80000000 = 0.00% -; CHECK: edge entry -> case_b probability is 0x00000001 / 0x80000000 = 0.00% -; CHECK: edge entry -> case_c probability is 0x00000001 / 0x80000000 = 0.00% -; CHECK: edge entry -> case_d probability is 0x00000001 / 0x80000000 = 0.00% +; CHECK: edge entry -> exit probability is 0x7fffe000 / 0x80000000 = 100.00% [HOT edge] +; CHECK: edge entry -> case_a probability is 0x00000800 / 0x80000000 = 0.00% +; CHECK: edge entry -> case_b probability is 0x00000800 / 0x80000000 = 0.00% +; CHECK: edge entry -> case_c probability is 0x00000800 / 0x80000000 = 0.00% +; CHECK: edge entry -> case_d probability is 0x00000800 / 0x80000000 = 0.00% case_a: br label %case_b @@ -56,8 +56,8 @@ define i32 @test3(i32 %a, i32 %b) { entry: %cond1 = icmp eq i32 %a, 42 br i1 %cond1, label %exit, label %dom -; CHECK: edge entry -> exit probability is 0x7fffffff / 0x80000000 = 100.00% [HOT edge] -; CHECK: edge entry -> dom probability is 0x00000001 / 0x80000000 = 0.00% +; CHECK: edge entry -> exit probability is 0x7ffff800 / 0x80000000 = 100.00% [HOT edge] +; CHECK: edge entry -> dom probability is 0x00000800 / 0x80000000 = 0.00% dom: %cond2 = icmp ult i32 %a, 42 @@ -85,8 +85,8 @@ define i32 @test4(i32 %a, i32 %b) { entry: %cond1 = icmp eq i32 %a, 42 br i1 %cond1, label %header, label %exit -; CHECK: edge entry -> header probability is 0x00000001 / 0x80000000 = 0.00% -; CHECK: edge entry -> exit probability is 0x7fffffff / 0x80000000 = 100.00% [HOT edge] +; CHECK: edge entry -> header probability is 0x00000800 / 0x80000000 = 0.00% +; CHECK: edge entry -> exit probability is 0x7ffff800 / 0x80000000 = 100.00% [HOT edge] header: br label %body @@ -94,9 +94,8 @@ header: body: %cond2 = icmp eq i32 %a, 42 br i1 %cond2, label %header, label %abort -; CHECK: edge body -> header probability is 0x40000000 / 0x80000000 = 50.00% -; CHECK: edge body -> abort probability is 0x40000000 / 0x80000000 = 50.00% - +; CHECK: edge body -> header probability is 0x7ffff800 / 0x80000000 = 100.00% [HOT edge] +; CHECK: edge body -> abort probability is 0x00000800 / 0x80000000 = 0.00% abort: call void @abort() noreturn unreachable @@ -113,15 +112,15 @@ define i32 @throwSmallException(i32 %idx, i32 %limit) #0 personality i8* bitcast entry: %cmp = icmp sge i32 %idx, %limit br i1 %cmp, label %if.then, label %if.end -; CHECK: edge entry -> if.then probability is 0x00000001 / 0x80000000 = 0.00% -; CHECK: edge entry -> if.end probability is 0x7fffffff / 0x80000000 = 100.00% [HOT edge] +; CHECK: edge entry -> if.then probability is 0x00000800 / 0x80000000 = 0.00% +; CHECK: edge entry -> if.end probability is 0x7ffff800 / 0x80000000 = 100.00% [HOT edge] if.then: ; preds = %entry %exception = call i8* @__cxa_allocate_exception(i64 1) #0 invoke i32 @smallFunction(i32 %idx) to label %invoke.cont unwind label %lpad -; CHECK: edge if.then -> invoke.cont probability is 0x7ffff800 / 0x80000000 = 100.00% [HOT edge] -; CHECK: edge if.then -> lpad probability is 0x00000800 / 0x80000000 = 0.00% +; CHECK: edge if.then -> invoke.cont probability is 0x40000000 / 0x80000000 = 50.00% +; CHECK: edge if.then -> lpad probability is 0x40000000 / 0x80000000 = 50.00% invoke.cont: ; preds = %if.then call void @__cxa_throw(i8* %exception, i8* bitcast (i8** @_ZTIi to i8*), i8* null) #1 diff --git a/llvm/test/Analysis/BranchProbabilityInfo/unreachable.ll b/llvm/test/Analysis/BranchProbabilityInfo/unreachable.ll new file mode 100644 index 0000000000000..7ebdba2168ec4 --- /dev/null +++ b/llvm/test/Analysis/BranchProbabilityInfo/unreachable.ll @@ -0,0 +1,154 @@ +; RUN: opt -analyze -branch-prob < %s | FileCheck %s +; RUN: opt < %s -passes='print' -disable-output 2>&1 | FileCheck %s + +declare void @bar() cold + +; Both 'l1' and 'r1' has one edge leading to 'cold' and another one to +; 'unreachable' blocks. Check that 'cold' paths are preferred. Also ensure both +; paths from 'entry' block are equal. +define void @test1(i32 %0) { +;CHECK: edge entry -> l1 probability is 0x40000000 / 0x80000000 = 50.00% +;CHECK: edge entry -> r1 probability is 0x40000000 / 0x80000000 = 50.00% +;CHECK: edge l1 -> cold probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] +;CHECK: edge l1 -> unreached probability is 0x00000000 / 0x80000000 = 0.00% +;CHECK: edge r1 -> unreached probability is 0x00000000 / 0x80000000 = 0.00% +;CHECK: edge r1 -> cold probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +entry: + br i1 undef, label %l1, label %r1 + +l1: + br i1 undef, label %cold, label %unreached + +r1: + br i1 undef, label %unreached, label %cold + +unreached: + unreachable + +cold: + call void @bar() + ret void +} + +; Both edges of 'l1' leads to 'cold' blocks while one edge of 'r1' leads to +; 'unreachable' block. Check that 'l1' has 50/50 while 'r1' has 0/100 +; distributuion. Also ensure both paths from 'entry' block are equal. +define void @test2(i32 %0) { +;CHECK: edge entry -> l1 probability is 0x40000000 / 0x80000000 = 50.00% +;CHECK: edge entry -> r1 probability is 0x40000000 / 0x80000000 = 50.00% +;CHECK: edge l1 -> cold probability is 0x40000000 / 0x80000000 = 50.00% +;CHECK: edge l1 -> cold2 probability is 0x40000000 / 0x80000000 = 50.00% +;CHECK: edge r1 -> unreached probability is 0x00000000 / 0x80000000 = 0.00% +;CHECK: edge r1 -> cold probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +entry: + br i1 undef, label %l1, label %r1 + +l1: + br i1 undef, label %cold, label %cold2 + +r1: + br i1 undef, label %unreached, label %cold + +unreached: + unreachable + +cold: + call void @bar() + ret void + +cold2: + call void @bar() + ret void +} + +; Both edges of 'r1' leads to 'unreachable' blocks while one edge of 'l1' leads to +; 'cold' block. Ensure that path leading to 'cold' block is preferred. +define void @test3(i32 %0) { +;CHECK: edge entry -> l1 probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] +;CHECK: edge entry -> r1 probability is 0x00000000 / 0x80000000 = 0.00% +;CHECK: edge l1 -> cold probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] +;CHECK: edge l1 -> unreached probability is 0x00000000 / 0x80000000 = 0.00% +;CHECK: edge r1 -> unreached probability is 0x40000000 / 0x80000000 = 50.00% +;CHECK: edge r1 -> unreached2 probability is 0x40000000 / 0x80000000 = 50.00% + +entry: + br i1 undef, label %l1, label %r1 + +l1: + br i1 undef, label %cold, label %unreached + +r1: + br i1 undef, label %unreached, label %unreached2 + +unreached: + unreachable + +unreached2: + unreachable + +cold: + call void @bar() + ret void +} + +; Left edge of 'entry' leads to 'cold' block while right edge is 'normal' continuation. +; Check that we able to propagate 'cold' weight to 'entry' block. Also ensure +; both edges from 'l1' are equally likely. +define void @test4(i32 %0) { +;CHECK: edge entry -> l1 probability is 0x078780e3 / 0x80000000 = 5.88% +;CHECK: edge entry -> r1 probability is 0x78787f1d / 0x80000000 = 94.12% [HOT edge] +;CHECK: edge l1 -> l2 probability is 0x40000000 / 0x80000000 = 50.00% +;CHECK: edge l1 -> r2 probability is 0x40000000 / 0x80000000 = 50.00% +;CHECK: edge l2 -> to.cold probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] +;CHECK: edge r2 -> to.cold probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] +;CHECK: edge to.cold -> cold probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +entry: + br i1 undef, label %l1, label %r1 + +l1: + br i1 undef, label %l2, label %r2 + +l2: + br label %to.cold + +r2: + br label %to.cold + +to.cold: + br label %cold + +r1: + ret void + +cold: + call void @bar() + ret void +} + +; Check that most likely path from 'entry' to 'l2' through 'r1' is preferred. +define void @test5(i32 %0) { +;CHECK: edge entry -> cold probability is 0x078780e3 / 0x80000000 = 5.88% +;CHECK: edge entry -> r1 probability is 0x78787f1d / 0x80000000 = 94.12% [HOT edge] +;CHECK: edge cold -> l2 probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] +;CHECK: edge r1 -> l2 probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] +;CHECK: edge r1 -> unreached probability is 0x00000000 / 0x80000000 = 0.00% + +entry: + br i1 undef, label %cold, label %r1 + +cold: + call void @bar() + br label %l2 + +r1: + br i1 undef, label %l2, label %unreached + +l2: + ret void + +unreached: + unreachable +} diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-invoke-probabilities.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-invoke-probabilities.ll index 95dc96ee63efd..a10148f1ffdc1 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-invoke-probabilities.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-invoke-probabilities.ll @@ -11,7 +11,7 @@ declare i32 @hoge(...) define void @pluto() align 2 personality i8* bitcast (i32 (...)* @hoge to i8*) { ; CHECK-LABEL: @pluto ; CHECK: bb.1.bb -; CHECK: successors: %bb.2(0x7ffff800), %bb.3(0x00000800) +; CHECK: successors: %bb.2(0x40000000), %bb.3(0x40000000) ; CHECK: EH_LABEL ; CHECK: G_BR %bb.2 diff --git a/llvm/test/CodeGen/AMDGPU/transform-block-with-return-to-epilog.ll b/llvm/test/CodeGen/AMDGPU/transform-block-with-return-to-epilog.ll index 416a72d51f992..c2f1bde56bff0 100644 --- a/llvm/test/CodeGen/AMDGPU/transform-block-with-return-to-epilog.ll +++ b/llvm/test/CodeGen/AMDGPU/transform-block-with-return-to-epilog.ll @@ -14,7 +14,7 @@ entry: define amdgpu_ps float @test_return_to_epilog_into_end_block(i32 inreg %a, float %b) #0 { ; GCN-LABEL: name: test_return_to_epilog_into_end_block ; GCN: bb.0.entry: - ; GCN: successors: %bb.1(0x7fffffff), %bb.2(0x00000001) + ; GCN: successors: %bb.1(0x80000000), %bb.2(0x00000000) ; GCN: liveins: $sgpr2, $vgpr0 ; GCN: S_CMP_LT_I32 killed renamable $sgpr2, 1, implicit-def $scc ; GCN: S_CBRANCH_SCC1 %bb.2, implicit killed $scc @@ -49,7 +49,7 @@ define amdgpu_ps float @test_unify_return_to_epilog_into_end_block(i32 inreg %a, ; GCN: liveins: $vgpr0 ; GCN: S_BRANCH %bb.5 ; GCN: bb.2.else.if.cond: - ; GCN: successors: %bb.3(0x7fffffff), %bb.4(0x00000001) + ; GCN: successors: %bb.3(0x80000000), %bb.4(0x00000000) ; GCN: liveins: $sgpr3, $vgpr1 ; GCN: S_CMP_LT_I32 killed renamable $sgpr3, 1, implicit-def $scc ; GCN: S_CBRANCH_SCC1 %bb.4, implicit killed $scc diff --git a/llvm/test/CodeGen/ARM/ifcvt-branch-weight-bug.ll b/llvm/test/CodeGen/ARM/ifcvt-branch-weight-bug.ll index 2f4143547a01a..af2009c7a2526 100644 --- a/llvm/test/CodeGen/ARM/ifcvt-branch-weight-bug.ll +++ b/llvm/test/CodeGen/ARM/ifcvt-branch-weight-bug.ll @@ -22,7 +22,7 @@ entry: ; for.body -> for.cond.backedge (100%) ; -> cond.false.i (0%) ; CHECK: bb.1.for.body: -; CHECK: successors: %bb.2(0x80000000), %bb.4(0x00000001) +; CHECK: successors: %bb.2(0x80000000), %bb.4(0x00000000) for.body: br i1 undef, label %for.cond.backedge, label %lor.lhs.false.i, !prof !1 diff --git a/llvm/test/CodeGen/ARM/sub-cmp-peephole.ll b/llvm/test/CodeGen/ARM/sub-cmp-peephole.ll index 9720df795eb6d..ef907ee3ff499 100644 --- a/llvm/test/CodeGen/ARM/sub-cmp-peephole.ll +++ b/llvm/test/CodeGen/ARM/sub-cmp-peephole.ll @@ -168,7 +168,7 @@ entry: ; CHECK-LABEL: cmp_slt0 ; CHECK: sub ; CHECK: cmn -; CHECK: bgt +; CHECK: ble %load = load i32, i32* @t, align 4 %sub = sub i32 %load, 17 %cmp = icmp slt i32 %sub, 0 diff --git a/llvm/test/CodeGen/ARM/v8m.base-jumptable_alignment.ll b/llvm/test/CodeGen/ARM/v8m.base-jumptable_alignment.ll index 8d45c3cd06413..eab750d2b8466 100644 --- a/llvm/test/CodeGen/ARM/v8m.base-jumptable_alignment.ll +++ b/llvm/test/CodeGen/ARM/v8m.base-jumptable_alignment.ll @@ -23,7 +23,7 @@ define void @main() { ; CHECK-NEXT: bne .LBB0_8 ; CHECK-NEXT: .LBB0_2: @ %for.cond14.preheader.us.i.i.i ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: cbnz r0, .LBB0_7 +; CHECK-NEXT: cbnz r0, .LBB0_6 ; CHECK-NEXT: @ %bb.3: @ %for.cond14.preheader.us.i.i.i ; CHECK-NEXT: @ in Loop: Header=BB0_2 Depth=1 ; CHECK-NEXT: lsls r1, r0, #2 @@ -34,22 +34,22 @@ define void @main() { ; CHECK-NEXT: .p2align 2 ; CHECK-NEXT: .LJTI0_0: ; CHECK-NEXT: b.w .LBB0_5 -; CHECK-NEXT: b.w .LBB0_7 ; CHECK-NEXT: b.w .LBB0_6 -; CHECK-NEXT: b.w .LBB0_8 -; CHECK-NEXT: b.w .LBB0_7 -; CHECK-NEXT: b.w .LBB0_7 -; CHECK-NEXT: b.w .LBB0_7 -; CHECK-NEXT: b.w .LBB0_7 -; CHECK-NEXT: b.w .LBB0_7 -; CHECK-NEXT: b.w .LBB0_7 ; CHECK-NEXT: b.w .LBB0_7 +; CHECK-NEXT: b.w .LBB0_8 +; CHECK-NEXT: b.w .LBB0_6 +; CHECK-NEXT: b.w .LBB0_6 +; CHECK-NEXT: b.w .LBB0_6 +; CHECK-NEXT: b.w .LBB0_6 +; CHECK-NEXT: b.w .LBB0_6 +; CHECK-NEXT: b.w .LBB0_6 +; CHECK-NEXT: b.w .LBB0_6 ; CHECK-NEXT: b.w .LBB0_5 ; CHECK-NEXT: .LBB0_5: @ %for.cond14.preheader.us.i.i.i ; CHECK-NEXT: @ in Loop: Header=BB0_2 Depth=1 ; CHECK-NEXT: b .LBB0_2 -; CHECK-NEXT: .LBB0_6: @ %lbl_1394.i.i.i.loopexit -; CHECK-NEXT: .LBB0_7: @ %func_1.exit.loopexit +; CHECK-NEXT: .LBB0_6: @ %func_1.exit.loopexit +; CHECK-NEXT: .LBB0_7: @ %lbl_1394.i.i.i.loopexit ; CHECK-NEXT: .LBB0_8: @ %for.end476.i.i.i.loopexit entry: %0 = load volatile i32**, i32*** @g_566, align 4 diff --git a/llvm/test/CodeGen/PowerPC/p10-spill-crgt.ll b/llvm/test/CodeGen/PowerPC/p10-spill-crgt.ll index 39e3e80f542cc..b4166bde22ab8 100644 --- a/llvm/test/CodeGen/PowerPC/p10-spill-crgt.ll +++ b/llvm/test/CodeGen/PowerPC/p10-spill-crgt.ll @@ -23,8 +23,8 @@ define dso_local fastcc void @P10_Spill_CR_GT() unnamed_addr { ; CHECK-NEXT: mfcr r12 ; CHECK-NEXT: std r0, 16(r1) ; CHECK-NEXT: stw r12, 8(r1) -; CHECK-NEXT: stdu r1, -80(r1) -; CHECK-NEXT: .cfi_def_cfa_offset 80 +; CHECK-NEXT: stdu r1, -64(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 64 ; CHECK-NEXT: .cfi_offset lr, 16 ; CHECK-NEXT: .cfi_offset r29, -24 ; CHECK-NEXT: .cfi_offset r30, -16 @@ -32,32 +32,31 @@ define dso_local fastcc void @P10_Spill_CR_GT() unnamed_addr { ; CHECK-NEXT: .cfi_offset cr3, 8 ; CHECK-NEXT: .cfi_offset cr4, 8 ; CHECK-NEXT: lwz r3, 0(r3) -; CHECK-NEXT: std r29, 56(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r30, 64(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r29, 40(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r30, 48(r1) # 8-byte Folded Spill ; CHECK-NEXT: paddi r29, 0, .LJTI0_0@PCREL, 1 ; CHECK-NEXT: srwi r4, r3, 4 ; CHECK-NEXT: srwi r3, r3, 5 ; CHECK-NEXT: andi. r4, r4, 1 ; CHECK-NEXT: li r4, 0 -; CHECK-NEXT: crmove 4*cr4+lt, gt +; CHECK-NEXT: crmove 4*cr2+gt, gt ; CHECK-NEXT: andi. r3, r3, 1 -; CHECK-NEXT: setnbc r3, gt -; CHECK-NEXT: stw r3, 52(r1) +; CHECK-NEXT: crmove 4*cr2+lt, gt ; CHECK-NEXT: cmplwi cr3, r3, 336 ; CHECK-NEXT: li r3, 0 ; CHECK-NEXT: sldi r30, r3, 2 ; CHECK-NEXT: b .LBB0_2 -; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB0_1: # %bb43 ; CHECK-NEXT: # ; CHECK-NEXT: bl call_1@notoc ; CHECK-NEXT: li r4, 0 -; CHECK-NEXT: setnbc r3, 4*cr2+eq +; CHECK-NEXT: setnbc r3, 4*cr4+eq ; CHECK-NEXT: stb r4, 0(r3) ; CHECK-NEXT: li r4, 0 +; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB0_2: # %bb5 ; CHECK-NEXT: # -; CHECK-NEXT: bc 12, 4*cr4+lt, .LBB0_31 +; CHECK-NEXT: bc 12, 4*cr2+gt, .LBB0_31 ; CHECK-NEXT: # %bb.3: # %bb10 ; CHECK-NEXT: # ; CHECK-NEXT: bgt cr3, .LBB0_5 @@ -66,7 +65,7 @@ define dso_local fastcc void @P10_Spill_CR_GT() unnamed_addr { ; CHECK-NEXT: mr r3, r4 ; CHECK-NEXT: lwz r5, 0(r3) ; CHECK-NEXT: rlwinm r4, r5, 0, 21, 22 -; CHECK-NEXT: cmpwi cr2, r4, 512 +; CHECK-NEXT: cmpwi cr4, r4, 512 ; CHECK-NEXT: lwax r4, r30, r29 ; CHECK-NEXT: add r4, r4, r29 ; CHECK-NEXT: mtctr r4 @@ -105,11 +104,11 @@ define dso_local fastcc void @P10_Spill_CR_GT() unnamed_addr { ; CHECK-NEXT: # ; CHECK-NEXT: b .LBB0_12 ; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: .LBB0_13: # %bb47 +; CHECK-NEXT: .LBB0_13: # %bb61 ; CHECK-NEXT: # ; CHECK-NEXT: b .LBB0_13 ; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: .LBB0_14: # %bb58 +; CHECK-NEXT: .LBB0_14: # %bb47 ; CHECK-NEXT: # ; CHECK-NEXT: b .LBB0_14 ; CHECK-NEXT: .p2align 4 @@ -121,51 +120,51 @@ define dso_local fastcc void @P10_Spill_CR_GT() unnamed_addr { ; CHECK-NEXT: # ; CHECK-NEXT: b .LBB0_16 ; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: .LBB0_17: # %bb23 +; CHECK-NEXT: .LBB0_17: # %bb59 ; CHECK-NEXT: # ; CHECK-NEXT: b .LBB0_17 ; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: .LBB0_18: # %bb60 +; CHECK-NEXT: .LBB0_18: # %bb46 ; CHECK-NEXT: # ; CHECK-NEXT: b .LBB0_18 ; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: .LBB0_19: # %bb59 +; CHECK-NEXT: .LBB0_19: # %bb49 ; CHECK-NEXT: # ; CHECK-NEXT: b .LBB0_19 ; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: .LBB0_20: # %bb46 +; CHECK-NEXT: .LBB0_20: # %bb57 ; CHECK-NEXT: # ; CHECK-NEXT: b .LBB0_20 ; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: .LBB0_21: # %bb49 +; CHECK-NEXT: .LBB0_21: # %bb18 ; CHECK-NEXT: # ; CHECK-NEXT: b .LBB0_21 ; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: .LBB0_22: # %bb57 +; CHECK-NEXT: .LBB0_22: # %bb58 ; CHECK-NEXT: # ; CHECK-NEXT: b .LBB0_22 ; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: .LBB0_23: # %bb56 +; CHECK-NEXT: .LBB0_23: # %bb23 ; CHECK-NEXT: # ; CHECK-NEXT: b .LBB0_23 ; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: .LBB0_24: # %bb20 +; CHECK-NEXT: .LBB0_24: # %bb60 ; CHECK-NEXT: # ; CHECK-NEXT: b .LBB0_24 ; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: .LBB0_25: # %bb18 +; CHECK-NEXT: .LBB0_25: # %bb55 ; CHECK-NEXT: # ; CHECK-NEXT: b .LBB0_25 ; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: .LBB0_26: # %bb61 +; CHECK-NEXT: .LBB0_26: # %bb62 ; CHECK-NEXT: # ; CHECK-NEXT: b .LBB0_26 ; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: .LBB0_27: # %bb55 +; CHECK-NEXT: .LBB0_27: # %bb56 ; CHECK-NEXT: # ; CHECK-NEXT: b .LBB0_27 ; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: .LBB0_28: # %bb62 +; CHECK-NEXT: .LBB0_28: # %bb20 ; CHECK-NEXT: # ; CHECK-NEXT: b .LBB0_28 ; CHECK-NEXT: .p2align 4 @@ -177,9 +176,9 @@ define dso_local fastcc void @P10_Spill_CR_GT() unnamed_addr { ; CHECK-NEXT: # ; CHECK-NEXT: b .LBB0_30 ; CHECK-NEXT: .LBB0_31: # %bb9 -; CHECK-NEXT: ld r30, 64(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r29, 56(r1) # 8-byte Folded Reload -; CHECK-NEXT: addi r1, r1, 80 +; CHECK-NEXT: ld r30, 48(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r29, 40(r1) # 8-byte Folded Reload +; CHECK-NEXT: addi r1, r1, 64 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: lwz r12, 8(r1) ; CHECK-NEXT: mtlr r0 @@ -188,29 +187,26 @@ define dso_local fastcc void @P10_Spill_CR_GT() unnamed_addr { ; CHECK-NEXT: mtocrf 8, r12 ; CHECK-NEXT: blr ; CHECK-NEXT: .LBB0_32: # %bb29 -; CHECK-NEXT: lwz r4, 52(r1) -; CHECK-NEXT: cmpwi cr4, r3, 0 -; CHECK-NEXT: setnbc r30, 4*cr2+eq -; CHECK-NEXT: # implicit-def: $cr2lt -; CHECK-NEXT: mfocrf r3, 32 +; CHECK-NEXT: mcrf cr0, cr4 ; CHECK-NEXT: cmpwi cr3, r5, 366 +; CHECK-NEXT: cmpwi cr4, r3, 0 ; CHECK-NEXT: li r29, 0 -; CHECK-NEXT: rlwimi r3, r4, 24, 8, 8 -; CHECK-NEXT: mtocrf 32, r3 +; CHECK-NEXT: setnbc r30, eq +; CHECK-NEXT: bc 12, 4*cr2+lt, .LBB0_36 ; CHECK-NEXT: .p2align 5 -; CHECK-NEXT: .LBB0_33: # %bb32 -; CHECK-NEXT: # -; CHECK-NEXT: bc 4, 4*cr2+lt, .LBB0_35 -; CHECK-NEXT: # %bb.34: # %bb33 -; CHECK-NEXT: # -; CHECK-NEXT: stb r29, 0(r30) -; CHECK-NEXT: .LBB0_35: # %bb36 -; CHECK-NEXT: # -; CHECK-NEXT: bc 4, 4*cr4+eq, .LBB0_33 -; CHECK-NEXT: # %bb.36: # %bb39 -; CHECK-NEXT: # +; CHECK-NEXT: .LBB0_33: # %bb36 +; CHECK-NEXT: bc 12, 4*cr4+eq, .LBB0_35 +; CHECK-NEXT: .LBB0_34: # %bb32 +; CHECK-NEXT: bc 4, 4*cr2+lt, .LBB0_33 +; CHECK-NEXT: b .LBB0_36 +; CHECK-NEXT: .p2align 5 +; CHECK-NEXT: .LBB0_35: # %bb39 ; CHECK-NEXT: bl call_2@notoc -; CHECK-NEXT: b .LBB0_33 +; CHECK-NEXT: bc 4, 4*cr2+lt, .LBB0_33 +; CHECK-NEXT: .LBB0_36: # %bb33 +; CHECK-NEXT: stb r29, 0(r30) +; CHECK-NEXT: bc 4, 4*cr4+eq, .LBB0_34 +; CHECK-NEXT: b .LBB0_35 ; ; CHECK-BE-LABEL: P10_Spill_CR_GT: ; CHECK-BE: # %bb.0: # %bb @@ -218,8 +214,8 @@ define dso_local fastcc void @P10_Spill_CR_GT() unnamed_addr { ; CHECK-BE-NEXT: mfcr r12 ; CHECK-BE-NEXT: std r0, 16(r1) ; CHECK-BE-NEXT: stw r12, 8(r1) -; CHECK-BE-NEXT: stdu r1, -160(r1) -; CHECK-BE-NEXT: .cfi_def_cfa_offset 160 +; CHECK-BE-NEXT: stdu r1, -144(r1) +; CHECK-BE-NEXT: .cfi_def_cfa_offset 144 ; CHECK-BE-NEXT: .cfi_offset lr, 16 ; CHECK-BE-NEXT: .cfi_offset r29, -24 ; CHECK-BE-NEXT: .cfi_offset r30, -16 @@ -227,34 +223,33 @@ define dso_local fastcc void @P10_Spill_CR_GT() unnamed_addr { ; CHECK-BE-NEXT: .cfi_offset cr2, 8 ; CHECK-BE-NEXT: .cfi_offset cr2, 8 ; CHECK-BE-NEXT: lwz r3, 0(r3) -; CHECK-BE-NEXT: std r29, 136(r1) # 8-byte Folded Spill -; CHECK-BE-NEXT: std r30, 144(r1) # 8-byte Folded Spill +; CHECK-BE-NEXT: std r29, 120(r1) # 8-byte Folded Spill +; CHECK-BE-NEXT: std r30, 128(r1) # 8-byte Folded Spill ; CHECK-BE-NEXT: srwi r4, r3, 4 ; CHECK-BE-NEXT: srwi r3, r3, 5 ; CHECK-BE-NEXT: andi. r4, r4, 1 ; CHECK-BE-NEXT: li r4, 0 -; CHECK-BE-NEXT: crmove 4*cr4+lt, gt +; CHECK-BE-NEXT: crmove 4*cr2+gt, gt ; CHECK-BE-NEXT: andi. r3, r3, 1 -; CHECK-BE-NEXT: setnbc r3, gt -; CHECK-BE-NEXT: stw r3, 132(r1) +; CHECK-BE-NEXT: crmove 4*cr2+lt, gt ; CHECK-BE-NEXT: cmplwi cr3, r3, 336 ; CHECK-BE-NEXT: li r3, 0 ; CHECK-BE-NEXT: sldi r30, r3, 2 ; CHECK-BE-NEXT: addis r3, r2, .LC0@toc@ha ; CHECK-BE-NEXT: ld r29, .LC0@toc@l(r3) ; CHECK-BE-NEXT: b .LBB0_2 -; CHECK-BE-NEXT: .p2align 4 ; CHECK-BE-NEXT: .LBB0_1: # %bb43 ; CHECK-BE-NEXT: # ; CHECK-BE-NEXT: bl call_1 ; CHECK-BE-NEXT: nop ; CHECK-BE-NEXT: li r4, 0 -; CHECK-BE-NEXT: setnbc r3, 4*cr2+eq +; CHECK-BE-NEXT: setnbc r3, 4*cr4+eq ; CHECK-BE-NEXT: stb r4, 0(r3) ; CHECK-BE-NEXT: li r4, 0 +; CHECK-BE-NEXT: .p2align 4 ; CHECK-BE-NEXT: .LBB0_2: # %bb5 ; CHECK-BE-NEXT: # -; CHECK-BE-NEXT: bc 12, 4*cr4+lt, .LBB0_31 +; CHECK-BE-NEXT: bc 12, 4*cr2+gt, .LBB0_31 ; CHECK-BE-NEXT: # %bb.3: # %bb10 ; CHECK-BE-NEXT: # ; CHECK-BE-NEXT: bgt cr3, .LBB0_5 @@ -263,7 +258,7 @@ define dso_local fastcc void @P10_Spill_CR_GT() unnamed_addr { ; CHECK-BE-NEXT: mr r3, r4 ; CHECK-BE-NEXT: lwz r5, 0(r3) ; CHECK-BE-NEXT: rlwinm r4, r5, 0, 21, 22 -; CHECK-BE-NEXT: cmpwi cr2, r4, 512 +; CHECK-BE-NEXT: cmpwi cr4, r4, 512 ; CHECK-BE-NEXT: lwax r4, r30, r29 ; CHECK-BE-NEXT: add r4, r4, r29 ; CHECK-BE-NEXT: mtctr r4 @@ -302,11 +297,11 @@ define dso_local fastcc void @P10_Spill_CR_GT() unnamed_addr { ; CHECK-BE-NEXT: # ; CHECK-BE-NEXT: b .LBB0_12 ; CHECK-BE-NEXT: .p2align 4 -; CHECK-BE-NEXT: .LBB0_13: # %bb47 +; CHECK-BE-NEXT: .LBB0_13: # %bb61 ; CHECK-BE-NEXT: # ; CHECK-BE-NEXT: b .LBB0_13 ; CHECK-BE-NEXT: .p2align 4 -; CHECK-BE-NEXT: .LBB0_14: # %bb58 +; CHECK-BE-NEXT: .LBB0_14: # %bb47 ; CHECK-BE-NEXT: # ; CHECK-BE-NEXT: b .LBB0_14 ; CHECK-BE-NEXT: .p2align 4 @@ -318,51 +313,51 @@ define dso_local fastcc void @P10_Spill_CR_GT() unnamed_addr { ; CHECK-BE-NEXT: # ; CHECK-BE-NEXT: b .LBB0_16 ; CHECK-BE-NEXT: .p2align 4 -; CHECK-BE-NEXT: .LBB0_17: # %bb23 +; CHECK-BE-NEXT: .LBB0_17: # %bb59 ; CHECK-BE-NEXT: # ; CHECK-BE-NEXT: b .LBB0_17 ; CHECK-BE-NEXT: .p2align 4 -; CHECK-BE-NEXT: .LBB0_18: # %bb60 +; CHECK-BE-NEXT: .LBB0_18: # %bb46 ; CHECK-BE-NEXT: # ; CHECK-BE-NEXT: b .LBB0_18 ; CHECK-BE-NEXT: .p2align 4 -; CHECK-BE-NEXT: .LBB0_19: # %bb59 +; CHECK-BE-NEXT: .LBB0_19: # %bb49 ; CHECK-BE-NEXT: # ; CHECK-BE-NEXT: b .LBB0_19 ; CHECK-BE-NEXT: .p2align 4 -; CHECK-BE-NEXT: .LBB0_20: # %bb46 +; CHECK-BE-NEXT: .LBB0_20: # %bb57 ; CHECK-BE-NEXT: # ; CHECK-BE-NEXT: b .LBB0_20 ; CHECK-BE-NEXT: .p2align 4 -; CHECK-BE-NEXT: .LBB0_21: # %bb49 +; CHECK-BE-NEXT: .LBB0_21: # %bb18 ; CHECK-BE-NEXT: # ; CHECK-BE-NEXT: b .LBB0_21 ; CHECK-BE-NEXT: .p2align 4 -; CHECK-BE-NEXT: .LBB0_22: # %bb57 +; CHECK-BE-NEXT: .LBB0_22: # %bb58 ; CHECK-BE-NEXT: # ; CHECK-BE-NEXT: b .LBB0_22 ; CHECK-BE-NEXT: .p2align 4 -; CHECK-BE-NEXT: .LBB0_23: # %bb56 +; CHECK-BE-NEXT: .LBB0_23: # %bb23 ; CHECK-BE-NEXT: # ; CHECK-BE-NEXT: b .LBB0_23 ; CHECK-BE-NEXT: .p2align 4 -; CHECK-BE-NEXT: .LBB0_24: # %bb20 +; CHECK-BE-NEXT: .LBB0_24: # %bb60 ; CHECK-BE-NEXT: # ; CHECK-BE-NEXT: b .LBB0_24 ; CHECK-BE-NEXT: .p2align 4 -; CHECK-BE-NEXT: .LBB0_25: # %bb18 +; CHECK-BE-NEXT: .LBB0_25: # %bb55 ; CHECK-BE-NEXT: # ; CHECK-BE-NEXT: b .LBB0_25 ; CHECK-BE-NEXT: .p2align 4 -; CHECK-BE-NEXT: .LBB0_26: # %bb61 +; CHECK-BE-NEXT: .LBB0_26: # %bb62 ; CHECK-BE-NEXT: # ; CHECK-BE-NEXT: b .LBB0_26 ; CHECK-BE-NEXT: .p2align 4 -; CHECK-BE-NEXT: .LBB0_27: # %bb55 +; CHECK-BE-NEXT: .LBB0_27: # %bb56 ; CHECK-BE-NEXT: # ; CHECK-BE-NEXT: b .LBB0_27 ; CHECK-BE-NEXT: .p2align 4 -; CHECK-BE-NEXT: .LBB0_28: # %bb62 +; CHECK-BE-NEXT: .LBB0_28: # %bb20 ; CHECK-BE-NEXT: # ; CHECK-BE-NEXT: b .LBB0_28 ; CHECK-BE-NEXT: .p2align 4 @@ -374,9 +369,9 @@ define dso_local fastcc void @P10_Spill_CR_GT() unnamed_addr { ; CHECK-BE-NEXT: # ; CHECK-BE-NEXT: b .LBB0_30 ; CHECK-BE-NEXT: .LBB0_31: # %bb9 -; CHECK-BE-NEXT: ld r30, 144(r1) # 8-byte Folded Reload -; CHECK-BE-NEXT: ld r29, 136(r1) # 8-byte Folded Reload -; CHECK-BE-NEXT: addi r1, r1, 160 +; CHECK-BE-NEXT: ld r30, 128(r1) # 8-byte Folded Reload +; CHECK-BE-NEXT: ld r29, 120(r1) # 8-byte Folded Reload +; CHECK-BE-NEXT: addi r1, r1, 144 ; CHECK-BE-NEXT: ld r0, 16(r1) ; CHECK-BE-NEXT: lwz r12, 8(r1) ; CHECK-BE-NEXT: mtlr r0 @@ -385,30 +380,27 @@ define dso_local fastcc void @P10_Spill_CR_GT() unnamed_addr { ; CHECK-BE-NEXT: mtocrf 8, r12 ; CHECK-BE-NEXT: blr ; CHECK-BE-NEXT: .LBB0_32: # %bb29 -; CHECK-BE-NEXT: lwz r4, 132(r1) -; CHECK-BE-NEXT: cmpwi cr4, r3, 0 -; CHECK-BE-NEXT: setnbc r30, 4*cr2+eq -; CHECK-BE-NEXT: # implicit-def: $cr2lt -; CHECK-BE-NEXT: mfocrf r3, 32 +; CHECK-BE-NEXT: mcrf cr0, cr4 ; CHECK-BE-NEXT: cmpwi cr3, r5, 366 +; CHECK-BE-NEXT: cmpwi cr4, r3, 0 ; CHECK-BE-NEXT: li r29, 0 -; CHECK-BE-NEXT: rlwimi r3, r4, 24, 8, 8 -; CHECK-BE-NEXT: mtocrf 32, r3 -; CHECK-BE-NEXT: .p2align 5 -; CHECK-BE-NEXT: .LBB0_33: # %bb32 -; CHECK-BE-NEXT: # -; CHECK-BE-NEXT: bc 4, 4*cr2+lt, .LBB0_35 -; CHECK-BE-NEXT: # %bb.34: # %bb33 -; CHECK-BE-NEXT: # -; CHECK-BE-NEXT: stb r29, 0(r30) -; CHECK-BE-NEXT: .LBB0_35: # %bb36 -; CHECK-BE-NEXT: # -; CHECK-BE-NEXT: bc 4, 4*cr4+eq, .LBB0_33 -; CHECK-BE-NEXT: # %bb.36: # %bb39 -; CHECK-BE-NEXT: # +; CHECK-BE-NEXT: setnbc r30, eq +; CHECK-BE-NEXT: bc 12, 4*cr2+lt, .LBB0_36 +; CHECK-BE-NEXT: .p2align 4 +; CHECK-BE-NEXT: .LBB0_33: # %bb36 +; CHECK-BE-NEXT: bc 12, 4*cr4+eq, .LBB0_35 +; CHECK-BE-NEXT: .LBB0_34: # %bb32 +; CHECK-BE-NEXT: bc 4, 4*cr2+lt, .LBB0_33 +; CHECK-BE-NEXT: b .LBB0_36 +; CHECK-BE-NEXT: .p2align 4 +; CHECK-BE-NEXT: .LBB0_35: # %bb39 ; CHECK-BE-NEXT: bl call_2 ; CHECK-BE-NEXT: nop -; CHECK-BE-NEXT: b .LBB0_33 +; CHECK-BE-NEXT: bc 4, 4*cr2+lt, .LBB0_33 +; CHECK-BE-NEXT: .LBB0_36: # %bb33 +; CHECK-BE-NEXT: stb r29, 0(r30) +; CHECK-BE-NEXT: bc 4, 4*cr4+eq, .LBB0_34 +; CHECK-BE-NEXT: b .LBB0_35 bb: %tmp = load i32, i32* undef, align 8 %tmp1 = and i32 %tmp, 16 diff --git a/llvm/test/CodeGen/PowerPC/pr36292.ll b/llvm/test/CodeGen/PowerPC/pr36292.ll index a859121bb505c..883d26b669088 100644 --- a/llvm/test/CodeGen/PowerPC/pr36292.ll +++ b/llvm/test/CodeGen/PowerPC/pr36292.ll @@ -15,7 +15,8 @@ define void @test() nounwind comdat { ; CHECK-NEXT: ld 29, 0(3) ; CHECK-NEXT: ld 30, 32(1) ; CHECK-NEXT: cmpld 30, 29 -; CHECK-NEXT: bge 0, .LBB0_2 +; CHECK-NEXT: bge- 0, .LBB0_2 +; CHECK-NEXT: .p2align 5 ; CHECK-NEXT: .LBB0_1: # %bounds.ok ; CHECK-NEXT: # ; CHECK-NEXT: lfsx 2, 0, 3 @@ -25,7 +26,7 @@ define void @test() nounwind comdat { ; CHECK-NEXT: addi 30, 30, 1 ; CHECK-NEXT: stfsx 1, 0, 3 ; CHECK-NEXT: cmpld 30, 29 -; CHECK-NEXT: blt 0, .LBB0_1 +; CHECK-NEXT: blt+ 0, .LBB0_1 ; CHECK-NEXT: .LBB0_2: # %bounds.fail ; CHECK-NEXT: std 30, 32(1) %pos = alloca i64, align 8 diff --git a/llvm/test/CodeGen/PowerPC/sms-cpy-1.ll b/llvm/test/CodeGen/PowerPC/sms-cpy-1.ll index 6605a1fd78cc4..be4e3908944aa 100644 --- a/llvm/test/CodeGen/PowerPC/sms-cpy-1.ll +++ b/llvm/test/CodeGen/PowerPC/sms-cpy-1.ll @@ -44,6 +44,7 @@ define void @print_res() nounwind { ; CHECK-NEXT: addi 8, 8, -1 ; CHECK-NEXT: lbz 5, 0(5) ; CHECK-NEXT: bdz .LBB0_4 +; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB0_3: ; CHECK-NEXT: addi 3, 3, 1 ; CHECK-NEXT: clrldi 10, 8, 32 diff --git a/llvm/test/CodeGen/SPARC/missinglabel.ll b/llvm/test/CodeGen/SPARC/missinglabel.ll index 60a3641ccea0f..792af8eec3bdf 100644 --- a/llvm/test/CodeGen/SPARC/missinglabel.ll +++ b/llvm/test/CodeGen/SPARC/missinglabel.ll @@ -13,13 +13,13 @@ define void @f(i64 %a0) align 2 { ; CHECK-NEXT: nop ; CHECK-NEXT: ba .LBB0_1 ; CHECK-NEXT: nop +; CHECK-NEXT: .LBB0_1: ! %cond.false ; CHECK-NEXT: .LBB0_2: ! %targetblock ; CHECK-NEXT: mov %g0, %o0 ; CHECK-NEXT: cmp %o0, 0 ; CHECK-NEXT: bne .LBB0_4 ; CHECK-NEXT: nop ; CHECK-NEXT: ! %bb.3: ! %cond.false.i83 -; CHECK-NEXT: .LBB0_1: ! %cond.false ; CHECK-NEXT: .LBB0_4: ! %exit.i85 entry: %cmp = icmp eq i64 %a0, 0 diff --git a/llvm/test/CodeGen/SystemZ/debuginstr-cgp.mir b/llvm/test/CodeGen/SystemZ/debuginstr-cgp.mir index f61ca33007349..37a1bd776fd84 100644 --- a/llvm/test/CodeGen/SystemZ/debuginstr-cgp.mir +++ b/llvm/test/CodeGen/SystemZ/debuginstr-cgp.mir @@ -4,9 +4,9 @@ # RUN: llc %s -mtriple=s390x-linux-gnu -mcpu=z13 -start-before=codegenprepare \ # RUN: -stop-after codegenprepare -o - | FileCheck %s # -# CHECK-LABEL: bb2: +# CHECK-LABEL: bb1: # CHECK: ret -# CHECK-LABEL: bb4: +# CHECK-LABEL: bb2: # CHECK: ret diff --git a/llvm/test/CodeGen/WebAssembly/switch-unreachable-default.ll b/llvm/test/CodeGen/WebAssembly/switch-unreachable-default.ll index 0f345642d4106..af941da577cc2 100644 --- a/llvm/test/CodeGen/WebAssembly/switch-unreachable-default.ll +++ b/llvm/test/CodeGen/WebAssembly/switch-unreachable-default.ll @@ -43,10 +43,10 @@ unreachable: ; CHECK: br_if 0 ; CHECK: block ; CHECK: block -; CHECK: br_table {1, 1, 1, 1, 1, 1, 1, 0} +; CHECK: br_table {1, 1, 0} ; CHECK: .LBB1_2 ; CHECK: end_block -; CHECK: br_table {0, 0, 0} +; CHECK: br_table {0, 0, 0, 0, 0, 0, 0, 0} ; CHECK: .LBB1_3 ; CHECK: end_block ; CHECK: unreachable diff --git a/llvm/test/CodeGen/X86/2008-04-17-CoalescerBug.ll b/llvm/test/CodeGen/X86/2008-04-17-CoalescerBug.ll index 5454d7ef1bb4c..2ba3cf23774a6 100644 --- a/llvm/test/CodeGen/X86/2008-04-17-CoalescerBug.ll +++ b/llvm/test/CodeGen/X86/2008-04-17-CoalescerBug.ll @@ -47,7 +47,6 @@ define void @_ZNK10wxDateTime6FormatEPKwRKNS_8TimeZoneE(%struct.wxString* noalia ; CHECK-NEXT: movb $1, %bh ; CHECK-NEXT: movl $274877907, %ebp ## imm = 0x10624DD3 ; CHECK-NEXT: jmp LBB0_5 -; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB0_23: ## %bb7806 ; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1 ; CHECK-NEXT: Ltmp16: @@ -153,15 +152,6 @@ define void @_ZNK10wxDateTime6FormatEPKwRKNS_8TimeZoneE(%struct.wxString* noalia ; CHECK-NEXT: calll __ZN12wxStringBase10ConcatSelfEmPKwm ; CHECK-NEXT: Ltmp11: ; CHECK-NEXT: jmp LBB0_5 -; CHECK-NEXT: LBB0_22: ## %bb5968 -; CHECK-NEXT: Ltmp2: -; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp) -; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp) -; CHECK-NEXT: movl $0, (%esp) -; CHECK-NEXT: calll __ZN8wxString6FormatEPKwz -; CHECK-NEXT: subl $4, %esp -; CHECK-NEXT: Ltmp3: -; CHECK-NEXT: jmp LBB0_27 ; CHECK-NEXT: LBB0_9: ## %bb5657 ; CHECK-NEXT: Ltmp13: ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -170,6 +160,15 @@ define void @_ZNK10wxDateTime6FormatEPKwRKNS_8TimeZoneE(%struct.wxString* noalia ; CHECK-NEXT: movl %eax, (%esp) ; CHECK-NEXT: calll __ZNK10wxDateTime12GetDayOfYearERKNS_8TimeZoneE ; CHECK-NEXT: Ltmp14: +; CHECK-NEXT: jmp LBB0_27 +; CHECK-NEXT: LBB0_22: ## %bb5968 +; CHECK-NEXT: Ltmp2: +; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp) +; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp) +; CHECK-NEXT: movl $0, (%esp) +; CHECK-NEXT: calll __ZN8wxString6FormatEPKwz +; CHECK-NEXT: subl $4, %esp +; CHECK-NEXT: Ltmp3: ; CHECK-NEXT: LBB0_27: ## %bb115.critedge.i ; CHECK-NEXT: movl %esi, %eax ; CHECK-NEXT: addl $28, %esp diff --git a/llvm/test/CodeGen/X86/block-placement.ll b/llvm/test/CodeGen/X86/block-placement.ll index 258cc2031ae8b..acc4b7e138118 100644 --- a/llvm/test/CodeGen/X86/block-placement.ll +++ b/llvm/test/CodeGen/X86/block-placement.ll @@ -358,11 +358,11 @@ define void @unnatural_cfg2(i32* %p0, i32 %a0) { ; CHECK: %loop.header ; CHECK: %loop.body1 ; CHECK: %loop.body2 -; CHECK: %loop.body3 -; CHECK: %loop.inner1.begin ; CHECK: %loop.body4 ; CHECK: %loop.inner2.begin ; CHECK: %loop.inner2.begin +; CHECK: %loop.body3 +; CHECK: %loop.inner1.begin ; CHECK: %bail entry: diff --git a/llvm/test/CodeGen/X86/misched_phys_reg_assign_order.ll b/llvm/test/CodeGen/X86/misched_phys_reg_assign_order.ll index 7231ea35b7856..93e52ded59aff 100644 --- a/llvm/test/CodeGen/X86/misched_phys_reg_assign_order.ll +++ b/llvm/test/CodeGen/X86/misched_phys_reg_assign_order.ll @@ -27,10 +27,10 @@ define void @g() #0 { ; CHECK-NEXT: xorl %ebx, %ebx ; CHECK-NEXT: lock cmpxchg8b (%esi) ; CHECK-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; CHECK-NEXT: jne .LBB0_1 -; CHECK-NEXT: # %bb.2: # %k.end -; CHECK-NEXT: .LBB0_1: # %. +; CHECK-NEXT: je .LBB0_2 +; CHECK-NEXT: # %bb.1: # %. ; CHECK-NEXT: calll m +; CHECK-NEXT: .LBB0_2: # %k.end entry: %p = load i8*, i8** @f %v1 = load atomic i8, i8* %p monotonic, align 1 diff --git a/llvm/test/CodeGen/X86/pr27501.ll b/llvm/test/CodeGen/X86/pr27501.ll index bde41214471db..e5da4ddde239e 100644 --- a/llvm/test/CodeGen/X86/pr27501.ll +++ b/llvm/test/CodeGen/X86/pr27501.ll @@ -6,15 +6,15 @@ define void @test1(i64* %result.repack) personality i32 (...)* @__CxxFrameHandle bb: invoke void @may_throw(i32 1) to label %postinvoke unwind label %cleanuppad +; CHECK: movq %rcx, [[SpillLoc:.*\(%rbp\)]] ; CHECK: movl $1, %ecx ; CHECK: callq may_throw postinvoke: ; preds = %bb store i64 19, i64* %result.repack, align 8 - -; CHECK: movq $19, (%rsi) +; CHECK: movq [[SpillLoc]], [[R1:%r..]] +; CHECK: movq $19, ([[R1]]) ; CHECK: movl $2, %ecx -; CHECK-NEXT: movq %rsi, -8(%rbp) ; CHECK-NEXT: callq may_throw invoke void @may_throw(i32 2) to label %assertFailed unwind label %catch.dispatch @@ -38,8 +38,8 @@ try.success.or.caught: ; preds = %catchhandler postinvoke27: ; preds = %try.success.or.caught store i64 42, i64* %result.repack, align 8 -; CHECK: movq -8(%rbp), %[[reload:r..]] -; CHECK-NEXT: movq $42, (%[[reload]]) +; CHECK: movq [[SpillLoc]], [[R2:%r..]] +; CHECK-NEXT: movq $42, ([[R2]]) ret void cleanuppad24: ; preds = %try.success.or.caught diff --git a/llvm/test/CodeGen/X86/pr37916.ll b/llvm/test/CodeGen/X86/pr37916.ll index 84b8c08f9ea4c..270266b9bddbf 100644 --- a/llvm/test/CodeGen/X86/pr37916.ll +++ b/llvm/test/CodeGen/X86/pr37916.ll @@ -7,7 +7,7 @@ define void @fn1() local_unnamed_addr { ; CHECK-LABEL: fn1: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: .LBB0_1: # %if.end +; CHECK: .LBB0_1: # %if.end ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: movl a+4, %eax ; CHECK-NEXT: orl a, %eax diff --git a/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll b/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll index 670477c4c2851..69dada8a884e1 100644 --- a/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll +++ b/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll @@ -68,7 +68,7 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { ; CHECK-NEXT: je LBB0_55 ; CHECK-NEXT: ## %bb.6: ## %SyTime.exit2720 ; CHECK-NEXT: movq %rdx, %rbx -; CHECK-NEXT: movq %rdi, %r14 +; CHECK-NEXT: movq %rdi, %rbp ; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rax ; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rcx ; CHECK-NEXT: cmpq %rax, %rcx @@ -80,8 +80,7 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { ; CHECK-NEXT: LBB0_8: ## %while.body.preheader ; CHECK-NEXT: imulq $1040, %rbx, %rax ## imm = 0x410 ; CHECK-NEXT: movq _syBuf@{{.*}}(%rip), %rcx -; CHECK-NEXT: leaq 8(%rcx,%rax), %rax -; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: leaq 8(%rcx,%rax), %rdx ; CHECK-NEXT: movl $1, %r15d ; CHECK-NEXT: movq _syCTRO@{{.*}}(%rip), %rax ; CHECK-NEXT: movb $1, %cl @@ -92,70 +91,71 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { ; CHECK-NEXT: testb %cl, %cl ; CHECK-NEXT: jne LBB0_9 ; CHECK-NEXT: ## %bb.10: ## %do.end -; CHECK-NEXT: xorl %ebp, %ebp -; CHECK-NEXT: testb %bpl, %bpl +; CHECK-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: xorl %r13d, %r13d +; CHECK-NEXT: testb %r13b, %r13b ; CHECK-NEXT: jne LBB0_11 ; CHECK-NEXT: ## %bb.12: ## %while.body200.preheader -; CHECK-NEXT: xorl %ebx, %ebx -; CHECK-NEXT: leaq {{.*}}(%rip), %r13 -; CHECK-NEXT: movl $0, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Folded Spill ; CHECK-NEXT: xorl %r12d, %r12d -; CHECK-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: leaq {{.*}}(%rip), %rdx +; CHECK-NEXT: leaq {{.*}}(%rip), %rbx +; CHECK-NEXT: movl $0, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Folded Spill +; CHECK-NEXT: xorl %r14d, %r14d ; CHECK-NEXT: jmp LBB0_13 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB0_20: ## %sw.bb256 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: movl %ebp, %r12d +; CHECK-NEXT: movl %r13d, %r14d ; CHECK-NEXT: LBB0_21: ## %while.cond197.backedge ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 ; CHECK-NEXT: decl %r15d ; CHECK-NEXT: testl %r15d, %r15d -; CHECK-NEXT: movl %r12d, %ebp +; CHECK-NEXT: movl %r14d, %r13d ; CHECK-NEXT: jle LBB0_22 ; CHECK-NEXT: LBB0_13: ## %while.body200 ; CHECK-NEXT: ## =>This Loop Header: Depth=1 ; CHECK-NEXT: ## Child Loop BB0_29 Depth 2 ; CHECK-NEXT: ## Child Loop BB0_38 Depth 2 -; CHECK-NEXT: leal -268(%rbp), %eax +; CHECK-NEXT: leal -268(%r13), %eax ; CHECK-NEXT: cmpl $105, %eax ; CHECK-NEXT: ja LBB0_14 ; CHECK-NEXT: ## %bb.56: ## %while.body200 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: movslq (%r13,%rax,4), %rax -; CHECK-NEXT: addq %r13, %rax +; CHECK-NEXT: movslq (%rbx,%rax,4), %rax +; CHECK-NEXT: addq %rbx, %rax ; CHECK-NEXT: jmpq *%rax ; CHECK-NEXT: LBB0_44: ## %while.cond1037.preheader ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: testb %bl, %bl -; CHECK-NEXT: movl %ebp, %r12d +; CHECK-NEXT: testb %r12b, %r12b +; CHECK-NEXT: movl %r13d, %r14d ; CHECK-NEXT: jne LBB0_21 ; CHECK-NEXT: jmp LBB0_55 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB0_14: ## %while.body200 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: leal 1(%rbp), %eax +; CHECK-NEXT: leal 1(%r13), %eax ; CHECK-NEXT: cmpl $21, %eax ; CHECK-NEXT: ja LBB0_20 ; CHECK-NEXT: ## %bb.15: ## %while.body200 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: movl $-1, %r12d -; CHECK-NEXT: leaq {{.*}}(%rip), %rcx -; CHECK-NEXT: movslq (%rcx,%rax,4), %rax -; CHECK-NEXT: addq %rcx, %rax +; CHECK-NEXT: movl $-1, %r14d +; CHECK-NEXT: movslq (%rdx,%rax,4), %rax +; CHECK-NEXT: addq %rdx, %rax ; CHECK-NEXT: jmpq *%rax ; CHECK-NEXT: LBB0_18: ## %while.cond201.preheader ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: movl $1, %r12d +; CHECK-NEXT: movl $1, %r14d ; CHECK-NEXT: jmp LBB0_21 ; CHECK-NEXT: LBB0_26: ## %sw.bb474 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: testb %bl, %bl -; CHECK-NEXT: ## implicit-def: $r14 +; CHECK-NEXT: testb %r12b, %r12b +; CHECK-NEXT: ## implicit-def: $rbp ; CHECK-NEXT: jne LBB0_34 ; CHECK-NEXT: ## %bb.27: ## %do.body479.preheader ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: testb %bl, %bl -; CHECK-NEXT: ## implicit-def: $r14 +; CHECK-NEXT: testb %r12b, %r12b +; CHECK-NEXT: ## implicit-def: $rbp ; CHECK-NEXT: jne LBB0_34 ; CHECK-NEXT: ## %bb.28: ## %land.rhs485.preheader ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 @@ -164,8 +164,8 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB0_32: ## %do.body479.backedge ; CHECK-NEXT: ## in Loop: Header=BB0_29 Depth=2 -; CHECK-NEXT: leaq 1(%r14), %rax -; CHECK-NEXT: testb %bl, %bl +; CHECK-NEXT: leaq 1(%rbp), %rax +; CHECK-NEXT: testb %r12b, %r12b ; CHECK-NEXT: je LBB0_33 ; CHECK-NEXT: LBB0_29: ## %land.rhs485 ; CHECK-NEXT: ## Parent Loop BB0_13 Depth=1 @@ -174,14 +174,14 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { ; CHECK-NEXT: js LBB0_55 ; CHECK-NEXT: ## %bb.30: ## %cond.true.i.i2780 ; CHECK-NEXT: ## in Loop: Header=BB0_29 Depth=2 -; CHECK-NEXT: movq %rax, %r14 -; CHECK-NEXT: testb %bl, %bl +; CHECK-NEXT: movq %rax, %rbp +; CHECK-NEXT: testb %r12b, %r12b ; CHECK-NEXT: jne LBB0_32 ; CHECK-NEXT: ## %bb.31: ## %lor.rhs500 ; CHECK-NEXT: ## in Loop: Header=BB0_29 Depth=2 ; CHECK-NEXT: movl $256, %esi ## imm = 0x100 ; CHECK-NEXT: callq ___maskrune -; CHECK-NEXT: testb %bl, %bl +; CHECK-NEXT: testb %r12b, %r12b ; CHECK-NEXT: jne LBB0_32 ; CHECK-NEXT: jmp LBB0_34 ; CHECK-NEXT: LBB0_45: ## %sw.bb1134 @@ -192,22 +192,22 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { ; CHECK-NEXT: jb LBB0_55 ; CHECK-NEXT: ## %bb.46: ## in Loop: Header=BB0_13 Depth=1 ; CHECK-NEXT: movl $0, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Folded Spill -; CHECK-NEXT: movl $268, %r12d ## imm = 0x10C +; CHECK-NEXT: movl $268, %r14d ## imm = 0x10C ; CHECK-NEXT: jmp LBB0_21 ; CHECK-NEXT: LBB0_40: ## %sw.bb566 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: movl $20, %r12d +; CHECK-NEXT: movl $20, %r14d ; CHECK-NEXT: jmp LBB0_21 ; CHECK-NEXT: LBB0_19: ## %sw.bb243 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: movl $2, %r12d +; CHECK-NEXT: movl $2, %r14d ; CHECK-NEXT: jmp LBB0_21 ; CHECK-NEXT: LBB0_33: ## %if.end517.loopexitsplit ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: incq %r14 +; CHECK-NEXT: incq %rbp ; CHECK-NEXT: LBB0_34: ## %if.end517 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: leal -324(%r12), %eax +; CHECK-NEXT: leal -324(%r14), %eax ; CHECK-NEXT: cmpl $59, %eax ; CHECK-NEXT: ja LBB0_35 ; CHECK-NEXT: ## %bb.57: ## %if.end517 @@ -217,11 +217,11 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { ; CHECK-NEXT: jb LBB0_38 ; CHECK-NEXT: LBB0_35: ## %if.end517 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: cmpl $11, %r12d +; CHECK-NEXT: cmpl $11, %r14d ; CHECK-NEXT: je LBB0_38 ; CHECK-NEXT: ## %bb.36: ## %if.end517 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: cmpl $24, %r12d +; CHECK-NEXT: cmpl $24, %r14d ; CHECK-NEXT: je LBB0_38 ; CHECK-NEXT: ## %bb.37: ## %if.then532 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 @@ -231,14 +231,14 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { ; CHECK-NEXT: LBB0_38: ## %for.cond534 ; CHECK-NEXT: ## Parent Loop BB0_13 Depth=1 ; CHECK-NEXT: ## => This Inner Loop Header: Depth=2 -; CHECK-NEXT: testb %bl, %bl +; CHECK-NEXT: testb %r12b, %r12b ; CHECK-NEXT: jne LBB0_38 ; CHECK-NEXT: ## %bb.39: ## %for.cond542.preheader ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: testb %bl, %bl -; CHECK-NEXT: movb $0, (%r14) -; CHECK-NEXT: movl %ebp, %r12d -; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 ## 8-byte Reload +; CHECK-NEXT: testb %r12b, %r12b +; CHECK-NEXT: movb $0, (%rbp) +; CHECK-NEXT: movl %r13d, %r14d +; CHECK-NEXT: leaq {{.*}}(%rip), %rdx ; CHECK-NEXT: jmp LBB0_21 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB0_42: ## %while.cond864 @@ -254,30 +254,32 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { ; CHECK-NEXT: jmp LBB0_25 ; CHECK-NEXT: LBB0_11: ; CHECK-NEXT: movl $0, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Folded Spill -; CHECK-NEXT: xorl %r12d, %r12d +; CHECK-NEXT: xorl %r14d, %r14d ; CHECK-NEXT: LBB0_22: ## %while.end1465 -; CHECK-NEXT: incl %r12d -; CHECK-NEXT: cmpl $16, %r12d +; CHECK-NEXT: incl %r14d +; CHECK-NEXT: cmpl $16, %r14d ; CHECK-NEXT: ja LBB0_50 ; CHECK-NEXT: ## %bb.23: ## %while.end1465 ; CHECK-NEXT: movl $83969, %eax ## imm = 0x14801 -; CHECK-NEXT: btl %r12d, %eax +; CHECK-NEXT: btl %r14d, %eax ; CHECK-NEXT: jae LBB0_50 ; CHECK-NEXT: ## %bb.24: -; CHECK-NEXT: xorl %ebx, %ebx +; CHECK-NEXT: xorl %ebp, %ebp +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx ## 8-byte Reload ; CHECK-NEXT: LBB0_48: ## %if.then1477 ; CHECK-NEXT: movl $1, %edx ; CHECK-NEXT: callq _write -; CHECK-NEXT: subq %rbx, %r14 +; CHECK-NEXT: subq %rbp, %rbx ; CHECK-NEXT: movq _syHistory@{{.*}}(%rip), %rax -; CHECK-NEXT: leaq 8189(%r14,%rax), %rax +; CHECK-NEXT: leaq 8189(%rbx,%rax), %rax ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB0_49: ## %for.body1723 ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: decq %rax ; CHECK-NEXT: jmp LBB0_49 ; CHECK-NEXT: LBB0_47: ## %if.then1477.loopexit -; CHECK-NEXT: movq %r14, %rbx +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx ## 8-byte Reload +; CHECK-NEXT: movq %rbx, %rbp ; CHECK-NEXT: jmp LBB0_48 ; CHECK-NEXT: LBB0_16: ## %while.cond635.preheader ; CHECK-NEXT: xorl %eax, %eax @@ -298,17 +300,18 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { ; CHECK-NEXT: ## %bb.51: ## %for.body1664.lr.ph ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx ## 8-byte Reload +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ebp ## 4-byte Reload ; CHECK-NEXT: jne LBB0_54 ; CHECK-NEXT: ## %bb.52: ## %while.body1679.preheader -; CHECK-NEXT: incl {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Folded Spill +; CHECK-NEXT: incl %ebp +; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB0_53: ## %while.body1679 ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax ## 8-byte Reload -; CHECK-NEXT: movq (%rax), %rdi +; CHECK-NEXT: movq (%rbx), %rdi ; CHECK-NEXT: callq _fileno -; CHECK-NEXT: movslq {{[-0-9]+}}(%r{{[sb]}}p), %rax ## 4-byte Folded Reload -; CHECK-NEXT: leal 1(%rax), %ecx -; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill +; CHECK-NEXT: movslq %ebp, %rax +; CHECK-NEXT: leal 1(%rax), %ebp ; CHECK-NEXT: cmpq %rax, %rax ; CHECK-NEXT: jl LBB0_53 ; CHECK-NEXT: LBB0_54: ## %while.cond1683.preheader diff --git a/llvm/test/Transforms/JumpThreading/thread-prob-3.ll b/llvm/test/Transforms/JumpThreading/thread-prob-3.ll index a4a432b1a8329..27d060e159522 100644 --- a/llvm/test/Transforms/JumpThreading/thread-prob-3.ll +++ b/llvm/test/Transforms/JumpThreading/thread-prob-3.ll @@ -5,8 +5,8 @@ ; call DuplicateCondBranchOnPHIIntoPred(bb3, {bb2}). ; ; CHECK-LABEL: ---- Branch Probability Info : foo -; CHECK: set edge bb2 -> 0 successor probability to 0x7fffffff / 0x80000000 = 100.00% -; CHECK-NEXT: set edge bb2 -> 1 successor probability to 0x00000001 / 0x80000000 = 0.00% +; CHECK: set edge bb2 -> 0 successor probability to 0x80000000 / 0x80000000 = 100.00% +; CHECK-NEXT: set edge bb2 -> 1 successor probability to 0x00000000 / 0x80000000 = 0.00% define void @foo(i1 %f0, i1 %f1, i1 %f2) !prof !{!"function_entry_count", i64 0} { ; CHECK-LABEL: @foo( bb1: From e122a71a0a284e669c970e80214c6b3082aa2534 Mon Sep 17 00:00:00 2001 From: "Paul C. Anagnostopoulos" Date: Wed, 16 Dec 2020 09:55:16 -0500 Subject: [PATCH 177/378] [TableGen] Add the !substr() bang operator Update the documentation and add a test. Build failed: Change SIZE_MAX to std::numeric_limits::max(). Differential Revision: https://reviews.llvm.org/D93419 --- llvm/docs/TableGen/ProgRef.rst | 10 ++- llvm/include/llvm/TableGen/Record.h | 2 +- llvm/lib/TableGen/Record.cpp | 28 ++++++++- llvm/lib/TableGen/TGLexer.cpp | 1 + llvm/lib/TableGen/TGLexer.h | 6 +- llvm/lib/TableGen/TGParser.cpp | 95 ++++++++++++++++++++++++++++- llvm/lib/TableGen/TGParser.h | 1 + llvm/test/TableGen/substr.td | 81 ++++++++++++++++++++++++ 8 files changed, 215 insertions(+), 9 deletions(-) create mode 100644 llvm/test/TableGen/substr.td diff --git a/llvm/docs/TableGen/ProgRef.rst b/llvm/docs/TableGen/ProgRef.rst index 342b91a0c437b..f2ee7a7e549a8 100644 --- a/llvm/docs/TableGen/ProgRef.rst +++ b/llvm/docs/TableGen/ProgRef.rst @@ -216,7 +216,8 @@ TableGen provides "bang operators" that have a wide variety of uses: : !interleave !isa !le !listconcat !listsplat : !lt !mul !ne !not !or : !setdagop !shl !size !sra !srl - : !strconcat !sub !subst !tail !xor + : !strconcat !sub !subst !substr !tail + : !xor The ``!cond`` operator has a slightly different syntax compared to other bang operators, so it is defined separately: @@ -1723,6 +1724,13 @@ and non-0 as true. record if the *target* record name equals the *value* record name; otherwise it produces the *value*. +``!substr(``\ *string*\ ``,`` *start*\ [``,`` *length*]\ ``)`` + This operator extracts a substring of the given *string*. The starting + position of the substring is specified by *start*, which can range + between 0 and the length of the string. The length of the substring + is specified by *length*; if not specified, the rest of the string is + extracted. The *start* and *length* arguments must be integers. + ``!tail(``\ *a*\ ``)`` This operator produces a new list with all the elements of the list *a* except for the zeroth one. (See also ``!head``.) diff --git a/llvm/include/llvm/TableGen/Record.h b/llvm/include/llvm/TableGen/Record.h index 3010b4dad09a8..a0c5b2778547d 100644 --- a/llvm/include/llvm/TableGen/Record.h +++ b/llvm/include/llvm/TableGen/Record.h @@ -829,7 +829,7 @@ class BinOpInit : public OpInit, public FoldingSetNode { /// !op (X, Y, Z) - Combine two inits. class TernOpInit : public OpInit, public FoldingSetNode { public: - enum TernaryOp : uint8_t { SUBST, FOREACH, FILTER, IF, DAG }; + enum TernaryOp : uint8_t { SUBST, FOREACH, FILTER, IF, DAG, SUBSTR }; private: Init *LHS, *MHS, *RHS; diff --git a/llvm/lib/TableGen/Record.cpp b/llvm/lib/TableGen/Record.cpp index cbdce04494f37..9c0464d4e1bf6 100644 --- a/llvm/lib/TableGen/Record.cpp +++ b/llvm/lib/TableGen/Record.cpp @@ -1325,6 +1325,27 @@ Init *TernOpInit::Fold(Record *CurRec) const { } break; } + + case SUBSTR: { + StringInit *LHSs = dyn_cast(LHS); + IntInit *MHSi = dyn_cast(MHS); + IntInit *RHSi = dyn_cast(RHS); + if (LHSs && MHSi && RHSi) { + int64_t StringSize = LHSs->getValue().size(); + int64_t Start = MHSi->getValue(); + int64_t Length = RHSi->getValue(); + if (Start < 0 || Start > StringSize) + PrintError(CurRec->getLoc(), + Twine("!substr start position is out of range 0...") + + std::to_string(StringSize) + ": " + + std::to_string(Start)); + if (Length < 0) + PrintError(CurRec->getLoc(), "!substr length must be nonnegative"); + return StringInit::get(LHSs->getValue().substr(Start, Length), + LHSs->getFormat()); + } + break; + } } return const_cast(this); @@ -1364,11 +1385,12 @@ std::string TernOpInit::getAsString() const { std::string Result; bool UnquotedLHS = false; switch (getOpcode()) { - case SUBST: Result = "!subst"; break; - case FOREACH: Result = "!foreach"; UnquotedLHS = true; break; + case DAG: Result = "!dag"; break; case FILTER: Result = "!filter"; UnquotedLHS = true; break; + case FOREACH: Result = "!foreach"; UnquotedLHS = true; break; case IF: Result = "!if"; break; - case DAG: Result = "!dag"; break; + case SUBST: Result = "!subst"; break; + case SUBSTR: Result = "!substr"; break; } return (Result + "(" + (UnquotedLHS ? LHS->getAsUnquotedString() : LHS->getAsString()) + diff --git a/llvm/lib/TableGen/TGLexer.cpp b/llvm/lib/TableGen/TGLexer.cpp index df0df96f40eb7..a45ef6dc10c16 100644 --- a/llvm/lib/TableGen/TGLexer.cpp +++ b/llvm/lib/TableGen/TGLexer.cpp @@ -589,6 +589,7 @@ tgtok::TokKind TGLexer::LexExclaim() { .Case("listsplat", tgtok::XListSplat) .Case("strconcat", tgtok::XStrConcat) .Case("interleave", tgtok::XInterleave) + .Case("substr", tgtok::XSubstr) .Cases("setdagop", "setop", tgtok::XSetDagOp) // !setop is deprecated. .Cases("getdagop", "getop", tgtok::XGetDagOp) // !getop is deprecated. .Default(tgtok::Error); diff --git a/llvm/lib/TableGen/TGLexer.h b/llvm/lib/TableGen/TGLexer.h index 1856bef3ea9bd..ee568849ca887 100644 --- a/llvm/lib/TableGen/TGLexer.h +++ b/llvm/lib/TableGen/TGLexer.h @@ -53,9 +53,9 @@ namespace tgtok { // Bang operators. XConcat, XADD, XSUB, XMUL, XNOT, XAND, XOR, XXOR, XSRA, XSRL, XSHL, - XListConcat, XListSplat, XStrConcat, XInterleave, XCast, XSubst, XForEach, - XFilter, XFoldl, XHead, XTail, XSize, XEmpty, XIf, XCond, XEq, XIsA, - XDag, XNe, XLe, XLt, XGe, XGt, XSetDagOp, XGetDagOp, + XListConcat, XListSplat, XStrConcat, XInterleave, XSubstr, XCast, + XSubst, XForEach, XFilter, XFoldl, XHead, XTail, XSize, XEmpty, XIf, + XCond, XEq, XIsA, XDag, XNe, XLe, XLt, XGe, XGt, XSetDagOp, XGetDagOp, // Boolean literals. TrueVal, FalseVal, diff --git a/llvm/lib/TableGen/TGParser.cpp b/llvm/lib/TableGen/TGParser.cpp index 2671d29a72721..ebb66ccffc29a 100644 --- a/llvm/lib/TableGen/TGParser.cpp +++ b/llvm/lib/TableGen/TGParser.cpp @@ -25,6 +25,7 @@ #include #include #include +#include using namespace llvm; @@ -1496,6 +1497,9 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) { return (TernOpInit::get(Code, LHS, MHS, RHS, Type))->Fold(CurRec); } + case tgtok::XSubstr: + return ParseOperationSubstr(CurRec, ItemType); + case tgtok::XCond: return ParseOperationCond(CurRec, ItemType); @@ -1655,6 +1659,94 @@ RecTy *TGParser::ParseOperatorType() { return Type; } +/// Parse the !substr operation. Return null on error. +/// +/// Substr ::= !substr(string, start-int [, length-int]) => string +Init *TGParser::ParseOperationSubstr(Record *CurRec, RecTy *ItemType) { + TernOpInit::TernaryOp Code = TernOpInit::SUBSTR; + RecTy *Type = StringRecTy::get(); + + Lex.Lex(); // eat the operation + + if (!consume(tgtok::l_paren)) { + TokError("expected '(' after !substr operator"); + return nullptr; + } + + Init *LHS = ParseValue(CurRec); + if (!LHS) + return nullptr; + + if (!consume(tgtok::comma)) { + TokError("expected ',' in !substr operator"); + return nullptr; + } + + SMLoc MHSLoc = Lex.getLoc(); + Init *MHS = ParseValue(CurRec); + if (!MHS) + return nullptr; + + SMLoc RHSLoc = Lex.getLoc(); + Init *RHS; + if (consume(tgtok::comma)) { + RHSLoc = Lex.getLoc(); + RHS = ParseValue(CurRec); + if (!RHS) + return nullptr; + } else { + RHS = IntInit::get(std::numeric_limits::max()); + } + + if (!consume(tgtok::r_paren)) { + TokError("expected ')' in !substr operator"); + return nullptr; + } + + if (ItemType && !Type->typeIsConvertibleTo(ItemType)) { + Error(RHSLoc, Twine("expected value of type '") + + ItemType->getAsString() + "', got '" + + Type->getAsString() + "'"); + } + + TypedInit *LHSt = dyn_cast(LHS); + if (!LHSt && !isa(LHS)) { + TokError("could not determine type of the string in !substr"); + return nullptr; + } + if (LHSt && !isa(LHSt->getType())) { + TokError(Twine("expected string, got type '") + + LHSt->getType()->getAsString() + "'"); + return nullptr; + } + + TypedInit *MHSt = dyn_cast(MHS); + if (!MHSt && !isa(MHS)) { + TokError("could not determine type of the start position in !substr"); + return nullptr; + } + if (MHSt && !isa(MHSt->getType())) { + Error(MHSLoc, Twine("expected int, got type '") + + MHSt->getType()->getAsString() + "'"); + return nullptr; + } + + if (RHS) { + TypedInit *RHSt = dyn_cast(RHS); + if (!RHSt && !isa(RHS)) { + TokError("could not determine type of the length in !substr"); + return nullptr; + } + if (RHSt && !isa(RHSt->getType())) { + TokError(Twine("expected int, got type '") + + RHSt->getType()->getAsString() + "'"); + return nullptr; + } + } + + return (TernOpInit::get(Code, LHS, MHS, RHS, Type))->Fold(CurRec); +} + /// Parse the !foreach and !filter operations. Return null on error. /// /// ForEach ::= !foreach(ID, list-or-dag, expr) => list @@ -2206,7 +2298,8 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType, case tgtok::XFoldl: case tgtok::XForEach: case tgtok::XFilter: - case tgtok::XSubst: { // Value ::= !ternop '(' Value ',' Value ',' Value ')' + case tgtok::XSubst: + case tgtok::XSubstr: { // Value ::= !ternop '(' Value ',' Value ',' Value ')' return ParseOperation(CurRec, ItemType); } } diff --git a/llvm/lib/TableGen/TGParser.h b/llvm/lib/TableGen/TGParser.h index bdeb4d35382b2..3ed78a23067ff 100644 --- a/llvm/lib/TableGen/TGParser.h +++ b/llvm/lib/TableGen/TGParser.h @@ -254,6 +254,7 @@ class TGParser { TypedInit *FirstItem = nullptr); RecTy *ParseType(); Init *ParseOperation(Record *CurRec, RecTy *ItemType); + Init *ParseOperationSubstr(Record *CurRec, RecTy *ItemType); Init *ParseOperationForEachFilter(Record *CurRec, RecTy *ItemType); Init *ParseOperationCond(Record *CurRec, RecTy *ItemType); RecTy *ParseOperatorType(); diff --git a/llvm/test/TableGen/substr.td b/llvm/test/TableGen/substr.td new file mode 100644 index 0000000000000..5efe4ce69215e --- /dev/null +++ b/llvm/test/TableGen/substr.td @@ -0,0 +1,81 @@ +// RUN: llvm-tblgen %s | FileCheck %s +// RUN: not llvm-tblgen -DERROR1 %s 2>&1 | FileCheck --check-prefix=ERROR1 %s + +defvar claim = "This is the end of the world!"; + +// CHECK: def Rec1 +// CHECK: fullNoLength = "This is the end of the world!"; +// CHECK: fullLength = "This is the end of the world!"; +// CHECK: thisIsTheEnd = "This is the end"; +// CHECK: DoorsSong = "the end"; +// CHECK: finalNoLength = "end of the world!"; +// CHECK: finalLength = "end of the world!"; + +def Rec1 { + string fullNoLength = !substr(claim, 0); + string fullLength = !substr(claim, 0, 999); + string thisIsTheEnd = !substr(claim, 0, 15); + string DoorsSong = !substr(claim, 8, 7); + string finalNoLength = !substr(claim, 12); + string finalLength = !substr(claim, 12, !sub(!size(claim), 12)); +} + +// CHECK: def Rec2 { +// CHECK: lastName = "Flintstone"; + +def Rec2 { + string firstName = "Fred"; + string name = firstName # " " # "Flintstone"; + string lastName = !substr(name, !add(!size(firstName), 1)); +} + +// CHECK: def Rec3 { +// CHECK: test1 = ""; +// CHECK: test2 = ""; +// CHECK: test3 = ""; +// CHECK: test4 = "h"; +// CHECK: test5 = "hello"; +// CHECK: test6 = ""; + +def Rec3 { + string test1 = !substr("", 0, 0); + string test2 = !substr("", 0, 9); + string test3 = !substr("hello", 0, 0); + string test4 = !substr("hello", 0, 1); + string test5 = !substr("hello", 0, 99); + string test6 = !substr("hello", 5, 99); +} + +// CHECK: def Rec4 +// CHECK: message = "This is the end of the world!"; +// CHECK: messagePrefix = "This is th..."; +// CHECK: warning = "Bad message: 'This is th...'"; + +class C { + string message = msg; + string messagePrefix = !substr(message, 0, 10) # "..."; +} + +def Rec4 : C { + string warning = "Bad message: '" # messagePrefix # "'"; +} + +#ifdef ERROR1 + +// ERROR1: expected string, got type 'int' +// ERROR1: expected int, got type 'bits<3>' +// ERROR1: expected int, got type 'string' +// ERROR1: !substr start position is out of range 0...29: 30 +// ERROR1: !substr length must be nonnegative + +def Rec8 { + string claim1 = !substr(42, 0, 3); + string claim2 = !substr(claim, 0b101); + string claim3 = !substr(claim, 0, "oops"); +} + +def Rec9 { + string claim1 = !substr(claim, !add(!size(claim), 1)); + string claim2 = !substr(claim, 0, -13); +} +#endif From 9d1140e18e6f662636ac715b7dca202a969e9845 Mon Sep 17 00:00:00 2001 From: Jez Ng Date: Wed, 23 Dec 2020 11:24:02 -0500 Subject: [PATCH 178/378] [lld-macho] Simulator & DriverKit executables should always be PIE We didn't have support for parsing DriverKit in our `-platform` flag, so add that too. Also remove a bunch of unnecessary namespace prefixes. Reviewed By: #lld-macho, thakis Differential Revision: https://reviews.llvm.org/D93741 --- lld/MachO/Driver.cpp | 33 ++++++++++++++------------ lld/test/MachO/platform-version.s | 2 +- lld/test/MachO/x86-64-reloc-unsigned.s | 4 ++++ 3 files changed, 23 insertions(+), 16 deletions(-) diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp index edc9fe001ab5d..9780443fb85a2 100644 --- a/lld/MachO/Driver.cpp +++ b/lld/MachO/Driver.cpp @@ -548,20 +548,19 @@ static void handlePlatformVersion(const opt::Arg *arg) { // TODO(compnerd) see if we can generate this case list via XMACROS config->platform.kind = - llvm::StringSwitch(lowerDash(platformStr)) - .Cases("macos", "1", llvm::MachO::PlatformKind::macOS) - .Cases("ios", "2", llvm::MachO::PlatformKind::iOS) - .Cases("tvos", "3", llvm::MachO::PlatformKind::tvOS) - .Cases("watchos", "4", llvm::MachO::PlatformKind::watchOS) - .Cases("bridgeos", "5", llvm::MachO::PlatformKind::bridgeOS) - .Cases("mac-catalyst", "6", llvm::MachO::PlatformKind::macCatalyst) - .Cases("ios-simulator", "7", llvm::MachO::PlatformKind::iOSSimulator) - .Cases("tvos-simulator", "8", - llvm::MachO::PlatformKind::tvOSSimulator) - .Cases("watchos-simulator", "9", - llvm::MachO::PlatformKind::watchOSSimulator) - .Default(llvm::MachO::PlatformKind::unknown); - if (config->platform.kind == llvm::MachO::PlatformKind::unknown) + StringSwitch(lowerDash(platformStr)) + .Cases("macos", "1", PlatformKind::macOS) + .Cases("ios", "2", PlatformKind::iOS) + .Cases("tvos", "3", PlatformKind::tvOS) + .Cases("watchos", "4", PlatformKind::watchOS) + .Cases("bridgeos", "5", PlatformKind::bridgeOS) + .Cases("mac-catalyst", "6", PlatformKind::macCatalyst) + .Cases("ios-simulator", "7", PlatformKind::iOSSimulator) + .Cases("tvos-simulator", "8", PlatformKind::tvOSSimulator) + .Cases("watchos-simulator", "9", PlatformKind::watchOSSimulator) + .Cases("driverkit", "10", PlatformKind::driverKit) + .Default(PlatformKind::unknown); + if (config->platform.kind == PlatformKind::unknown) error(Twine("malformed platform: ") + platformStr); // TODO: check validity of version strings, which varies by platform // NOTE: ld64 accepts version strings with 5 components @@ -637,10 +636,14 @@ static bool isPie(opt::InputArgList &args) { // to PIE from 10.7, arm64 should always be PIE, etc assert(config->arch == AK_x86_64 || config->arch == AK_x86_64h); - if (config->platform.kind == MachO::PlatformKind::macOS && + PlatformKind kind = config->platform.kind; + if (kind == PlatformKind::macOS && config->platform.minimum >= VersionTuple(10, 6)) return true; + if (kind == PlatformKind::iOSSimulator || kind == PlatformKind::driverKit) + return true; + return args.hasArg(OPT_pie); } diff --git a/lld/test/MachO/platform-version.s b/lld/test/MachO/platform-version.s index 326a74428cf41..0bfc46930c5b2 100644 --- a/lld/test/MachO/platform-version.s +++ b/lld/test/MachO/platform-version.s @@ -55,7 +55,7 @@ # RUN: -platform_version 0 1 5 \ # RUN: | FileCheck --check-prefix=FAIL-PLATFORM %s # RUN: not %lld -o %t %t.o 2>&1 \ -# RUN: -platform_version 10 1 5 \ +# RUN: -platform_version 11 1 5 \ # RUN: | FileCheck --check-prefix=FAIL-PLATFORM %s # FAIL-PLATFORM: malformed platform: {{.*}} # FAIL-PLATFORM-NOT: malformed {{minimum|sdk}} version: {{.*}} diff --git a/lld/test/MachO/x86-64-reloc-unsigned.s b/lld/test/MachO/x86-64-reloc-unsigned.s index f1afc0cb70437..5fdbdf1f0627b 100644 --- a/lld/test/MachO/x86-64-reloc-unsigned.s +++ b/lld/test/MachO/x86-64-reloc-unsigned.s @@ -14,6 +14,10 @@ # RUN: llvm-objdump --macho --rebase %t-pie | FileCheck %s --check-prefix=PIE # RUN: %lld -platform_version macos 10.5.0 11.0 -o %t-no-pie %t.o # RUN: llvm-objdump --macho --rebase %t-no-pie | FileCheck %s --check-prefix=NO-PIE +# RUN: %lld -platform_version ios-simulator 11.0.0 14.2 -o %t-pie %t.o +# RUN: llvm-objdump --macho --rebase %t-pie | FileCheck %s --check-prefix=PIE +# RUN: %lld -platform_version driverkit 19.0 20.0 -o %t-pie %t.o +# RUN: llvm-objdump --macho --rebase %t-pie | FileCheck %s --check-prefix=PIE # CHECK: Contents of section __DATA,foo: # CHECK-NEXT: 100001000 08100000 01000000 From 631501b1f90e8a90faeadbd535a557633a5af71b Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Wed, 23 Dec 2020 11:45:46 -0500 Subject: [PATCH 179/378] [OpenMP] Fixing typo on memory size in Documenation --- openmp/docs/design/Runtimes.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openmp/docs/design/Runtimes.rst b/openmp/docs/design/Runtimes.rst index 2e5f2bfe03844..c9f3a55c00679 100644 --- a/openmp/docs/design/Runtimes.rst +++ b/openmp/docs/design/Runtimes.rst @@ -60,7 +60,7 @@ LIBOMPTARGET_MEMORY_MANAGER_THRESHOLD ``LIBOMPTARGET_MEMORY_MANAGER_THRESHOLD`` sets the threshold size for which the ``libomptarget`` memory manager will handle the allocation. Any allocations larger than this threshold will not use the memory manager and be freed after -the device kernel exits The default threshold value is ``8Kb``. If +the device kernel exits. The default threshold value is ``8KB``. If ``LIBOMPTARGET_MEMORY_MANAGER_THRESHOLD`` is set to ``0`` the memory manager will be completely disabled. From 7ad666798f12456d9e663e763e17e29007c3728d Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Wed, 23 Dec 2020 12:39:16 -0500 Subject: [PATCH 180/378] Revert 741978d727 and things that landed on top of it. 741978d727 made clang produce output that's 2x as large at least in sanitizer builds. https://reviews.llvm.org/D83892#2470185 has a standalone repro. This reverts the following commits: Revert "[clang][cli] Port CodeGenOpts simple string flags to new option parsing system" This reverts commit 95d3cc67caac04668ef808f65c30ced60ed14f5d. Revert "[clang][cli] Port LangOpts simple string based options to new option parsing system" This reverts commit aec2991d083a9c5b92f94d84a7b3a7bbed405af8. Revert "[clang][cli] Streamline MarshallingInfoFlag description" This reverts commit 27b7d646886d499c70dec3481dfc3c82dfc43dd7. Revert "[clang][cli] Port LangOpts option flags to new option parsing system" This reverts commit 383778e2171b4993f555433745466e211e713548. Revert "[clang][cli] Port CodeGen option flags to new option parsing system" This reverts commit 741978d727a445fa279d5952a86ea634adb7dc52. --- clang/include/clang/Basic/CodeGenOptions.h | 2 - .../clang/Basic/DiagnosticDriverKinds.td | 2 + .../clang/Basic/DiagnosticFrontendKinds.td | 2 + clang/include/clang/Driver/Options.td | 1357 +++++++---------- clang/lib/Frontend/CompilerInvocation.cpp | 806 +++++++++- clang/test/Profile/c-generate.c | 2 +- llvm/include/llvm/Option/OptParser.td | 9 +- 7 files changed, 1295 insertions(+), 885 deletions(-) diff --git a/clang/include/clang/Basic/CodeGenOptions.h b/clang/include/clang/Basic/CodeGenOptions.h index ef4fa31256cd4..5c540812ed312 100644 --- a/clang/include/clang/Basic/CodeGenOptions.h +++ b/clang/include/clang/Basic/CodeGenOptions.h @@ -30,8 +30,6 @@ namespace clang { /// Bitfields of CodeGenOptions, split out from CodeGenOptions to ensure /// that this large collection of bitfields is a trivial class type. class CodeGenOptionsBase { - friend class CompilerInvocation; - public: #define CODEGENOPT(Name, Bits, Default) unsigned Name : Bits; #define ENUM_CODEGENOPT(Name, Type, Bits, Default) diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index e92a4bf1dac56..892fb1c24b6d1 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -86,6 +86,8 @@ def err_drv_invalid_thread_model_for_target : Error< "invalid thread model '%0' in '%1' for this target">; def err_drv_invalid_linker_name : Error< "invalid linker name in argument '%0'">; +def err_drv_invalid_pgo_instrumentor : Error< + "invalid PGO instrumentor in argument '%0'">; def err_drv_invalid_rtlib_name : Error< "invalid runtime library name in argument '%0'">; def err_drv_unsupported_rtlib_for_platform : Error< diff --git a/clang/include/clang/Basic/DiagnosticFrontendKinds.td b/clang/include/clang/Basic/DiagnosticFrontendKinds.td index b9f8c78e43da8..def189f659947 100644 --- a/clang/include/clang/Basic/DiagnosticFrontendKinds.td +++ b/clang/include/clang/Basic/DiagnosticFrontendKinds.td @@ -108,6 +108,8 @@ def err_fe_action_not_available : Error< "action %0 not compiled in">; def err_fe_invalid_alignment : Error< "invalid value '%1' in '%0'; alignment must be a power of 2">; +def err_fe_invalid_wchar_type + : Error<"invalid wchar_t type '%0'; must be one of 'char', 'short', 'int'">; def err_fe_invalid_exception_model : Error<"invalid exception model '%select{none|dwarf|sjlj|arm|seh|wasm|aix}0' for target '%1'">; def warn_fe_concepts_ts_flag : Warning< diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 3373984b76ae6..7f63a5577262b 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -696,7 +696,7 @@ def O_flag : Flag<["-"], "O">, Flags<[CC1Option]>, Alias, AliasArgs<["1"]>; def Ofast : Joined<["-"], "Ofast">, Group, Flags<[CC1Option]>; def P : Flag<["-"], "P">, Flags<[CC1Option]>, Group, HelpText<"Disable linemarker output in -E mode">, - MarshallingInfoNegativeFlag<"PreprocessorOutputOpts.ShowLineMarkers">; + MarshallingInfoFlag<"PreprocessorOutputOpts.ShowLineMarkers", "true">, IsNegative; def Qy : Flag<["-"], "Qy">, Flags<[CC1Option]>, HelpText<"Emit metadata containing compiler name and version">; def Qn : Flag<["-"], "Qn">, Flags<[CC1Option]>, @@ -910,10 +910,8 @@ def fcuda_flush_denormals_to_zero : Flag<["-"], "fcuda-flush-denormals-to-zero"> def fno_cuda_flush_denormals_to_zero : Flag<["-"], "fno-cuda-flush-denormals-to-zero">; defm cuda_approx_transcendentals : OptInFFlag<"cuda-approx-transcendentals", "Use", "Don't use", " approximate transcendental functions">; -defm gpu_rdc : BoolFOption<"gpu-rdc", - "LangOpts->GPURelocatableDeviceCode", DefaultsToFalse, - ChangedBy, - ResetBy>; +defm gpu_rdc : OptInFFlag<"gpu-rdc", + "Generate relocatable device code, also known as separate compilation mode", "", "">; def : Flag<["-"], "fcuda-rdc">, Alias; def : Flag<["-"], "fno-cuda-rdc">, Alias; defm cuda_short_ptr : OptInFFlag<"cuda-short-ptr", @@ -930,21 +928,16 @@ def hip_version_EQ : Joined<["--"], "hip-version=">, HelpText<"HIP version in the format of major.minor.patch">; def fhip_dump_offload_linker_script : Flag<["-"], "fhip-dump-offload-linker-script">, Group, Flags<[NoArgumentUnused, HelpHidden]>; -defm hip_new_launch_api : BoolFOption<"hip-new-launch-api", - "LangOpts->HIPUseNewLaunchAPI", DefaultsToFalse, - ChangedBy, ResetBy, - BothFlags<[], " new kernel launching API for HIP">>; +defm hip_new_launch_api : OptInFFlag<"hip-new-launch-api", + "Use", "Don't use", " new kernel launching API for HIP">; defm gpu_allow_device_init : OptInFFlag<"gpu-allow-device-init", "Allow", "Don't allow", " device side init function in HIP">; -defm gpu_defer_diag : BoolFOption<"gpu-defer-diag", - "LangOpts->GPUDeferDiag", DefaultsToFalse, - ChangedBy, ResetBy, - BothFlags<[], " host/device related diagnostic messages for CUDA/HIP">>; -defm gpu_exclude_wrong_side_overloads : BoolFOption<"gpu-exclude-wrong-side-overloads", - "LangOpts->GPUExcludeWrongSideOverloads", DefaultsToFalse, - ChangedBy, - ResetBy, - BothFlags<[HelpHidden], " in overloading resolution for CUDA/HIP">>; +defm gpu_defer_diag : OptInFFlag<"gpu-defer-diag", + "Defer", "Don't defer", " host/device related diagnostic messages" + " for CUDA/HIP">; +defm gpu_exclude_wrong_side_overloads : OptInFFlag<"gpu-exclude-wrong-side-overloads", + "Always exclude wrong side overloads", "Exclude wrong side overloads only if there are same side overloads", + " in overloading resolution for CUDA/HIP", [HelpHidden]>; def gpu_max_threads_per_block_EQ : Joined<["--"], "gpu-max-threads-per-block=">, Flags<[CC1Option]>, HelpText<"Default max threads per block for kernel launch bounds for HIP">; @@ -996,31 +989,21 @@ def interface_stub_version_EQ : JoinedOrSeparate<["-"], "interface-stub-version= def exported__symbols__list : Separate<["-"], "exported_symbols_list">; def e : JoinedOrSeparate<["-"], "e">, Flags<[LinkerInput]>, Group; def fmax_tokens_EQ : Joined<["-"], "fmax-tokens=">, Group, Flags<[CC1Option]>, - HelpText<"Max total number of preprocessed tokens for -Wmax-tokens.">, - MarshallingInfoStringInt<"LangOpts->MaxTokens">; + HelpText<"Max total number of preprocessed tokens for -Wmax-tokens.">; def fPIC : Flag<["-"], "fPIC">, Group; def fno_PIC : Flag<["-"], "fno-PIC">, Group; def fPIE : Flag<["-"], "fPIE">, Group; def fno_PIE : Flag<["-"], "fno-PIE">, Group; -defm access_control : BoolFOption<"access-control", - "LangOpts->AccessControl", DefaultsToTrue, - ChangedBy, - ResetBy>; +defm access_control : OptOutFFlag<"access-control", "", "Disable C++ access control">; def falign_functions : Flag<["-"], "falign-functions">, Group; def falign_functions_EQ : Joined<["-"], "falign-functions=">, Group; def fno_align_functions: Flag<["-"], "fno-align-functions">, Group; -defm allow_editor_placeholders : BoolFOption<"allow-editor-placeholders", - "LangOpts->AllowEditorPlaceholders", DefaultsToFalse, - ChangedBy, - ResetBy>; +defm allow_editor_placeholders : OptInFFlag<"allow-editor-placeholders", "Treat editor placeholders as valid source code">; def fallow_unsupported : Flag<["-"], "fallow-unsupported">, Group; def fapple_kext : Flag<["-"], "fapple-kext">, Group, Flags<[CC1Option]>, - HelpText<"Use Apple's kernel extensions ABI">, - MarshallingInfoFlag<"LangOpts->AppleKext">; -defm apple_pragma_pack : BoolFOption<"apple-pragma-pack", - "LangOpts->ApplePragmaPack", DefaultsToFalse, - ChangedBy, - ResetBy>; + HelpText<"Use Apple's kernel extensions ABI">; +def fapple_pragma_pack : Flag<["-"], "fapple-pragma-pack">, Group, Flags<[CC1Option]>, + HelpText<"Enable Apple gcc-compatible #pragma pack handling">; def shared_libsan : Flag<["-"], "shared-libsan">, HelpText<"Dynamically link the sanitizer runtime">; def static_libsan : Flag<["-"], "static-libsan">, @@ -1053,19 +1036,14 @@ defm coroutines_ts : OptInFFlag<"coroutines-ts", "Enable support for the C++ Cor def fembed_bitcode_EQ : Joined<["-"], "fembed-bitcode=">, Group, Flags<[NoXarchOption, CC1Option, CC1AsOption]>, MetaVarName<"