diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 37118702762956..9d195fcb3ff78c 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2682,6 +2682,25 @@ static Value *getExpandedStep(const InductionDescriptor &ID, return I->second; } +/// Knowing that loop \p L executes a single vector iteration, add instructions +/// that will get simplified and thus should not have any cost to \p +/// InstsToIgnore. +static void addFullyUnrolledInstructionsToIgnore( + Loop *L, const LoopVectorizationLegality::InductionList &IL, + SmallPtrSetImpl &InstsToIgnore) { + auto *Cmp = L->getLatchCmpInst(); + if (Cmp) + InstsToIgnore.insert(Cmp); + for (const auto &[IV, IndDesc] : IL) { + // Get next iteration value of the induction variable. + Instruction *IVInst = + cast(IV->getIncomingValueForBlock(L->getLoopLatch())); + if (all_of(IVInst->users(), + [&](const User *U) { return U == IV || U == Cmp; })) + InstsToIgnore.insert(IVInst); + } +} + void InnerLoopVectorizer::createInductionResumeVPValues( const SCEV2ValueTy &ExpandedSCEVs, Value *MainVectorTripCount, SmallPtrSetImpl *IVSubset) { @@ -5592,6 +5611,15 @@ InstructionCost LoopVectorizationCostModel::computePredInstDiscount( InstructionCost LoopVectorizationCostModel::expectedCost(ElementCount VF) { InstructionCost Cost; + // If the vector loop gets executed exactly once with the given VF, ignore the + // costs of comparison and induction instructions, as they'll get simplified + // away. + SmallPtrSet ValuesToIgnoreForVF; + auto TC = PSE.getSE()->getSmallConstantTripCount(TheLoop); + if (VF.isFixed() && TC == VF.getFixedValue() && !foldTailByMasking()) + addFullyUnrolledInstructionsToIgnore(TheLoop, Legal->getInductionVars(), + ValuesToIgnoreForVF); + // For each block. for (BasicBlock *BB : TheLoop->blocks()) { InstructionCost BlockCost; @@ -5599,7 +5627,7 @@ InstructionCost LoopVectorizationCostModel::expectedCost(ElementCount VF) { // For each instruction in the old loop. for (Instruction &I : BB->instructionsWithoutDebug()) { // Skip ignored values. - if (ValuesToIgnore.count(&I) || + if (ValuesToIgnore.count(&I) || ValuesToIgnoreForVF.count(&I) || (VF.isVector() && VecValuesToIgnore.count(&I))) continue; @@ -7281,6 +7309,17 @@ LoopVectorizationPlanner::precomputeCosts(VPlan &Plan, ElementCount VF, continue; IVInsts.push_back(CI); } + + // If the vector loop gets executed exactly once with the given VF, ignore + // the costs of comparison and induction instructions, as they'll get + // simplified away. + // TODO: Remove this code after stepping away from the legacy cost model and + // adding code to simplify VPlans before calculating their costs. + auto TC = PSE.getSE()->getSmallConstantTripCount(OrigLoop); + if (VF.isFixed() && TC == VF.getFixedValue() && !CM.foldTailByMasking()) + addFullyUnrolledInstructionsToIgnore(OrigLoop, Legal->getInductionVars(), + CostCtx.SkipCostComputation); + for (Instruction *IVInst : IVInsts) { if (CostCtx.skipCostComputation(IVInst, VF.isVector())) continue; diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/fully-unrolled-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/fully-unrolled-cost.ll index aea5467c0edb12..f5ffc731eac842 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/fully-unrolled-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/fully-unrolled-cost.ll @@ -12,12 +12,10 @@ define i64 @test(ptr %a, ptr %b) #0 { ; CHECK-NEXT: Cost of 1 for VF 8: exit condition instruction %exitcond.not = icmp eq i64 %i.iv.next, 16 ; CHECK-NEXT: Cost of 0 for VF 8: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%index.next> ; CHECK: Cost for VF 8: 26 -; CHECK-NEXT: Cost of 1 for VF 16: induction instruction %i.iv.next = add nuw nsw i64 %i.iv, 1 ; CHECK-NEXT: Cost of 0 for VF 16: induction instruction %i.iv = phi i64 [ 0, %entry ], [ %i.iv.next, %for.body ] -; CHECK-NEXT: Cost of 1 for VF 16: exit condition instruction %exitcond.not = icmp eq i64 %i.iv.next, 16 ; CHECK-NEXT: Cost of 0 for VF 16: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%index.next> -; CHECK: Cost for VF 16: 50 -; CHECK: LV: Selecting VF: vscale x 2 +; CHECK: Cost for VF 16: 48 +; CHECK: LV: Selecting VF: 16 entry: br label %for.body @@ -50,9 +48,8 @@ define i64 @test_external_iv_user(ptr %a, ptr %b) #0 { ; CHECK: Cost for VF 8: 26 ; CHECK-NEXT: Cost of 1 for VF 16: induction instruction %i.iv.next = add nuw nsw i64 %i.iv, 1 ; CHECK-NEXT: Cost of 0 for VF 16: induction instruction %i.iv = phi i64 [ 0, %entry ], [ %i.iv.next, %for.body ] -; CHECK-NEXT: Cost of 1 for VF 16: exit condition instruction %exitcond.not = icmp eq i64 %i.iv.next, 16 ; CHECK-NEXT: Cost of 0 for VF 16: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%index.next> -; CHECK: Cost for VF 16: 50 +; CHECK: Cost for VF 16: 49 ; CHECK: LV: Selecting VF: vscale x 2 entry: br label %for.body @@ -86,13 +83,10 @@ define i64 @test_two_ivs(ptr %a, ptr %b, i64 %start) #0 { ; CHECK-NEXT: Cost of 1 for VF 8: exit condition instruction %exitcond.not = icmp eq i64 %i.iv.next, 16 ; CHECK-NEXT: Cost of 0 for VF 8: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%index.next> ; CHECK: Cost for VF 8: 27 -; CHECK-NEXT: Cost of 1 for VF 16: induction instruction %i.iv.next = add nuw nsw i64 %i.iv, 1 ; CHECK-NEXT: Cost of 0 for VF 16: induction instruction %i.iv = phi i64 [ 0, %entry ], [ %i.iv.next, %for.body ] -; CHECK-NEXT: Cost of 1 for VF 16: induction instruction %j.iv.next = add nuw nsw i64 %j.iv, 1 ; CHECK-NEXT: Cost of 0 for VF 16: induction instruction %j.iv = phi i64 [ %start, %entry ], [ %j.iv.next, %for.body ] -; CHECK-NEXT: Cost of 1 for VF 16: exit condition instruction %exitcond.not = icmp eq i64 %i.iv.next, 16 ; CHECK-NEXT: Cost of 0 for VF 16: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%index.next> -; CHECK: Cost for VF 16: 51 +; CHECK: Cost for VF 16: 48 ; CHECK: LV: Selecting VF: 16 entry: br label %for.body @@ -125,11 +119,9 @@ define i1 @test_extra_cmp_user(ptr nocapture noundef %dst, ptr nocapture noundef ; CHECK-NEXT: Cost of 4 for VF 8: exit condition instruction %exitcond.not = icmp eq i64 %indvars.iv.next, 16 ; CHECK-NEXT: Cost of 0 for VF 8: EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%index.next> ; CHECK: Cost for VF 8: 12 -; CHECK-NEXT: Cost of 8 for VF 16: induction instruction %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 ; CHECK-NEXT: Cost of 0 for VF 16: induction instruction %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] -; CHECK-NEXT: Cost of 8 for VF 16: exit condition instruction %exitcond.not = icmp eq i64 %indvars.iv.next, 16 ; CHECK-NEXT: Cost of 0 for VF 16: EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%index.next> -; CHECK: Cost for VF 16: 20 +; CHECK: Cost for VF 16: 4 ; CHECK: LV: Selecting VF: 16 entry: br label %for.body