From 245b56a80bca6369a9be3102308617f2a4a4d51b Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Tue, 1 Oct 2024 20:57:24 +0100 Subject: [PATCH 01/19] [VPlan] Support VPIRBBs and VPIRInst phis with multiple predecessors. --- llvm/lib/Transforms/Vectorize/VPlan.cpp | 5 ++++- llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 9 +++++---- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index c1b97791331bcf..8609514c39e7d0 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -1056,7 +1056,10 @@ void VPlan::execute(VPTransformState *State) { State->CFG.DTU.applyUpdates({{DominatorTree::Delete, MiddleBB, ScalarPh}}); // Generate code in the loop pre-header and body. - for (VPBlockBase *Block : vp_depth_first_shallow(Entry)) + ReversePostOrderTraversal> RPOT( + Entry); + + for (VPBlockBase *Block : RPOT) Block->execute(State); VPBasicBlock *LatchVPBB = getVectorLoopRegion()->getExitingBasicBlock(); diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 41f13cc2d9a978..be3e958320e771 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -857,12 +857,13 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent, void VPIRInstruction::execute(VPTransformState &State) { assert((isa(&I) || getNumOperands() == 0) && "Only PHINodes can have extra operands"); - if (getNumOperands() == 1) { - VPValue *ExitValue = getOperand(0); + for (const auto &[Idx, Op] : enumerate(operands())) { + VPValue *ExitValue = Op; auto Lane = vputils::isUniformAfterVectorization(ExitValue) ? VPLane::getFirstLane() : VPLane::getLastLaneForVF(State.VF); - auto *PredVPBB = cast(getParent()->getSinglePredecessor()); + VPBlockBase *Pred = getParent()->getPredecessors()[Idx]; + auto *PredVPBB = Pred->getExitingBasicBlock(); BasicBlock *PredBB = State.CFG.VPBB2IRBB[PredVPBB]; // Set insertion point in PredBB in case an extract needs to be generated. // TODO: Model extracts explicitly. @@ -890,7 +891,7 @@ void VPIRInstruction::print(raw_ostream &O, const Twine &Indent, O << Indent << "IR " << I; if (getNumOperands() != 0) { - assert(getNumOperands() == 1 && "can have at most 1 operand"); + // assert(getNumOperands() == 1 && "can have at most 1 operand"); O << " (extra operand: "; printOperands(O, SlotTracker); O << ")"; From 47258deea863675e43fd7fd48376dce131441dc5 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Wed, 18 Sep 2024 21:35:57 +0100 Subject: [PATCH 02/19] [VPlan] Dispatch to multiple exit blocks via middle blocks. A more lightweight variant of https://github.com/llvm/llvm-project/pull/109193, which dispatches to multiple exit blocks via the middle blocks. --- .../Vectorize/LoopVectorizationLegality.h | 3 + .../Vectorize/LoopVectorizationLegality.cpp | 29 +++ .../Transforms/Vectorize/LoopVectorize.cpp | 82 +++--- llvm/lib/Transforms/Vectorize/VPlan.cpp | 39 ++- llvm/lib/Transforms/Vectorize/VPlan.h | 1 + .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 16 +- .../Transforms/Vectorize/VPlanTransforms.cpp | 82 ++++++ .../Transforms/Vectorize/VPlanTransforms.h | 4 + .../Transforms/Vectorize/VPlanVerifier.cpp | 8 - .../LoopVectorize/X86/multi-exit-codegen.ll | 240 ++++++++++++++++++ .../LoopVectorize/X86/multi-exit-cost.ll | 18 +- .../LoopVectorize/X86/multi-exit-vplan.ll | 148 +++++++++++ 12 files changed, 614 insertions(+), 56 deletions(-) create mode 100644 llvm/test/Transforms/LoopVectorize/X86/multi-exit-codegen.ll create mode 100644 llvm/test/Transforms/LoopVectorize/X86/multi-exit-vplan.ll diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h index dc7e484a40a452..af6fae44cf0f09 100644 --- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h +++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h @@ -287,6 +287,9 @@ class LoopVectorizationLegality { /// we can use in-order reductions. bool canVectorizeFPMath(bool EnableStrictReductions); + /// Returns true if the loop has an early exit that we can vectorize. + bool canVectorizeEarlyExit() const; + /// Return true if we can vectorize this loop while folding its tail by /// masking. bool canFoldTailByMasking() const; diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp index 43be72f0f34d45..ee53d28a4c8282 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp @@ -43,6 +43,10 @@ AllowStridedPointerIVs("lv-strided-pointer-ivs", cl::init(false), cl::Hidden, cl::desc("Enable recognition of non-constant strided " "pointer induction variables.")); +static cl::opt + EnableEarlyExitVectorization("enable-early-exit-vectorization", + cl::init(false), cl::Hidden, cl::desc("")); + namespace llvm { cl::opt HintsAllowReordering("hints-allow-reordering", cl::init(true), cl::Hidden, @@ -1378,6 +1382,10 @@ bool LoopVectorizationLegality::isFixedOrderRecurrence( } bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) const { + // When vectorizing early exits, create predicates for all blocks, except the + // header. + if (canVectorizeEarlyExit() && BB != TheLoop->getHeader()) + return true; return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT); } @@ -1514,6 +1522,27 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() { return true; } +bool LoopVectorizationLegality::canVectorizeEarlyExit() const { + // Currently only allow vectorizing loops with early exits, if early-exit + // vectorization is explicitly enabled and the loop has metadata to force + // vectorization. + if (!EnableEarlyExitVectorization) + return false; + + SmallVector Exiting; + TheLoop->getExitingBlocks(Exiting); + if (Exiting.size() == 1) + return false; + + LoopVectorizeHints Hints(TheLoop, true, *ORE); + if (Hints.getForce() == LoopVectorizeHints::FK_Undefined) + return false; + + Function *Fn = TheLoop->getHeader()->getParent(); + return Hints.allowVectorization(Fn, TheLoop, + true /*VectorizeOnlyWhenForced*/); +} + // Helper function to canVectorizeLoopNestCFG. bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp, bool UseVPlanNativePath) { diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index e8653498d32a12..befe8f7c0076a3 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1363,9 +1363,11 @@ class LoopVectorizationCostModel { // If we might exit from anywhere but the latch, must run the exiting // iteration in scalar form. if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) { - LLVM_DEBUG( - dbgs() << "LV: Loop requires scalar epilogue: multiple exits\n"); - return true; + if (!Legal->canVectorizeEarlyExit()) { + LLVM_DEBUG( + dbgs() << "LV: Loop requires scalar epilogue: multiple exits\n"); + return true; + } } if (IsVectorizing && InterleaveInfo.requiresScalarEpilogue()) { LLVM_DEBUG(dbgs() << "LV: Loop requires scalar epilogue: " @@ -2575,7 +2577,8 @@ void InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) { LoopVectorPreHeader = OrigLoop->getLoopPreheader(); assert(LoopVectorPreHeader && "Invalid loop structure"); LoopExitBlock = OrigLoop->getUniqueExitBlock(); // may be nullptr - assert((LoopExitBlock || Cost->requiresScalarEpilogue(VF.isVector())) && + assert((LoopExitBlock || Cost->requiresScalarEpilogue(VF.isVector()) || + Legal->canVectorizeEarlyExit()) && "multiple exit loop without required epilogue?"); LoopMiddleBlock = @@ -2758,8 +2761,6 @@ void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi, // value (the value that feeds into the phi from the loop latch). // We allow both, but they, obviously, have different values. - assert(OrigLoop->getUniqueExitBlock() && "Expected a single exit block"); - DenseMap MissingVals; // An external user of the last iteration's value should see the value that @@ -2819,6 +2820,9 @@ void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi, if (PHI->getBasicBlockIndex(MiddleBlock) == -1) PHI->addIncoming(I.second, MiddleBlock); } + + assert((MissingVals.empty() || OrigLoop->getUniqueExitBlock()) && + "Expected a single exit block"); } namespace { @@ -3599,7 +3603,8 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) { TheLoop->getExitingBlocks(Exiting); for (BasicBlock *E : Exiting) { auto *Cmp = dyn_cast(E->getTerminator()->getOperand(0)); - if (Cmp && TheLoop->contains(Cmp) && Cmp->hasOneUse()) + if (Cmp && TheLoop->contains(Cmp) && Cmp->hasOneUse() && + (TheLoop->getLoopLatch() == E || !Legal->canVectorizeEarlyExit())) AddToWorklistIfAllowed(Cmp); } @@ -7692,12 +7697,15 @@ DenseMap LoopVectorizationPlanner::executePlan( BestVPlan.execute(&State); // 2.5 Collect reduction resume values. - auto *ExitVPBB = - cast(BestVPlan.getVectorLoopRegion()->getSingleSuccessor()); - for (VPRecipeBase &R : *ExitVPBB) { - createAndCollectMergePhiForReduction( - dyn_cast(&R), State, OrigLoop, - State.CFG.VPBB2IRBB[ExitVPBB], ExpandedSCEVs); + VPBasicBlock *ExitVPBB = nullptr; + if (BestVPlan.getVectorLoopRegion()->getSingleSuccessor()) { + ExitVPBB = cast( + BestVPlan.getVectorLoopRegion()->getSingleSuccessor()); + for (VPRecipeBase &R : *ExitVPBB) { + createAndCollectMergePhiForReduction( + dyn_cast(&R), State, OrigLoop, + State.CFG.VPBB2IRBB[ExitVPBB], ExpandedSCEVs); + } } // 2.6. Maintain Loop Hints @@ -7723,6 +7731,7 @@ DenseMap LoopVectorizationPlanner::executePlan( LoopVectorizeHints Hints(L, true, *ORE); Hints.setAlreadyVectorized(); } + TargetTransformInfo::UnrollingPreferences UP; TTI.getUnrollingPreferences(L, *PSE.getSE(), UP, ORE); if (!UP.UnrollVectorizedLoop || CanonicalIVStartValue) @@ -7735,15 +7744,17 @@ DenseMap LoopVectorizationPlanner::executePlan( ILV.printDebugTracesAtEnd(); // 4. Adjust branch weight of the branch in the middle block. - auto *MiddleTerm = - cast(State.CFG.VPBB2IRBB[ExitVPBB]->getTerminator()); - if (MiddleTerm->isConditional() && - hasBranchWeightMD(*OrigLoop->getLoopLatch()->getTerminator())) { - // Assume that `Count % VectorTripCount` is equally distributed. - unsigned TripCount = BestVPlan.getUF() * State.VF.getKnownMinValue(); - assert(TripCount > 0 && "trip count should not be zero"); - const uint32_t Weights[] = {1, TripCount - 1}; - setBranchWeights(*MiddleTerm, Weights, /*IsExpected=*/false); + if (ExitVPBB) { + auto *MiddleTerm = + cast(State.CFG.VPBB2IRBB[ExitVPBB]->getTerminator()); + if (MiddleTerm->isConditional() && + hasBranchWeightMD(*OrigLoop->getLoopLatch()->getTerminator())) { + // Assume that `Count % VectorTripCount` is equally distributed. + unsigned TripCount = BestVPlan.getUF() * State.VF.getKnownMinValue(); + assert(TripCount > 0 && "trip count should not be zero"); + const uint32_t Weights[] = {1, TripCount - 1}; + setBranchWeights(*MiddleTerm, Weights, /*IsExpected=*/false); + } } return State.ExpandedSCEVs; @@ -8128,7 +8139,7 @@ VPValue *VPRecipeBuilder::createEdgeMask(BasicBlock *Src, BasicBlock *Dst) { // If source is an exiting block, we know the exit edge is dynamically dead // in the vector loop, and thus we don't need to restrict the mask. Avoid // adding uses of an otherwise potentially dead instruction. - if (OrigLoop->isLoopExiting(Src)) + if (!Legal->canVectorizeEarlyExit() && OrigLoop->isLoopExiting(Src)) return EdgeMaskCache[Edge] = SrcMask; VPValue *EdgeMask = getVPValueOrAddLiveIn(BI->getCondition()); @@ -8778,6 +8789,8 @@ static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, bool HasNUW, static SetVector collectUsersInExitBlock( Loop *OrigLoop, VPRecipeBuilder &Builder, VPlan &Plan, const MapVector &Inductions) { + if (!Plan.getVectorLoopRegion()->getSingleSuccessor()) + return {}; auto *MiddleVPBB = cast(Plan.getVectorLoopRegion()->getSingleSuccessor()); // No edge from the middle block to the unique exit block has been inserted @@ -8863,6 +8876,8 @@ static void addLiveOutsForFirstOrderRecurrences( // TODO: Should be replaced by // Plan->getScalarLoopRegion()->getSinglePredecessor() in the future once the // scalar region is modeled as well. + if (!VectorRegion->getSingleSuccessor()) + return; auto *MiddleVPBB = cast(VectorRegion->getSingleSuccessor()); VPBasicBlock *ScalarPHVPBB = nullptr; if (MiddleVPBB->getNumSuccessors() == 2) { @@ -9146,10 +9161,15 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) { "VPBasicBlock"); RecipeBuilder.fixHeaderPhis(); - SetVector ExitUsersToFix = collectUsersInExitBlock( - OrigLoop, RecipeBuilder, *Plan, Legal->getInductionVars()); - addLiveOutsForFirstOrderRecurrences(*Plan, ExitUsersToFix); - addUsersInExitBlock(*Plan, ExitUsersToFix); + if (Legal->canVectorizeEarlyExit()) { + VPlanTransforms::convertToMultiCond(*Plan, *PSE.getSE(), OrigLoop, + RecipeBuilder); + } else { + SetVector ExitUsersToFix = collectUsersInExitBlock( + OrigLoop, RecipeBuilder, *Plan, Legal->getInductionVars()); + addLiveOutsForFirstOrderRecurrences(*Plan, ExitUsersToFix); + addUsersInExitBlock(*Plan, ExitUsersToFix); + } // --------------------------------------------------------------------------- // Transform initial VPlan: Apply previously taken decisions, in order, to @@ -9277,8 +9297,6 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( using namespace VPlanPatternMatch; VPRegionBlock *VectorLoopRegion = Plan->getVectorLoopRegion(); VPBasicBlock *Header = VectorLoopRegion->getEntryBasicBlock(); - VPBasicBlock *MiddleVPBB = - cast(VectorLoopRegion->getSingleSuccessor()); for (VPRecipeBase &R : Header->phis()) { auto *PhiR = dyn_cast(&R); if (!PhiR || !PhiR->isInLoop() || (MinVF.isScalar() && !PhiR->isOrdered())) @@ -9297,8 +9315,6 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( for (VPUser *U : Cur->users()) { auto *UserRecipe = cast(U); if (!UserRecipe->getParent()->getEnclosingLoopRegion()) { - assert(UserRecipe->getParent() == MiddleVPBB && - "U must be either in the loop region or the middle block."); continue; } Worklist.insert(UserRecipe); @@ -9403,6 +9419,10 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( } VPBasicBlock *LatchVPBB = VectorLoopRegion->getExitingBasicBlock(); Builder.setInsertPoint(&*LatchVPBB->begin()); + if (!VectorLoopRegion->getSingleSuccessor()) + return; + VPBasicBlock *MiddleVPBB = + cast(VectorLoopRegion->getSingleSuccessor()); VPBasicBlock::iterator IP = MiddleVPBB->getFirstNonPhi(); for (VPRecipeBase &R : Plan->getVectorLoopRegion()->getEntryBasicBlock()->phis()) { diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index 8609514c39e7d0..eb7c808551340d 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -474,6 +474,14 @@ void VPIRBasicBlock::execute(VPTransformState *State) { // backedges. A backward successor is set when the branch is created. const auto &PredVPSuccessors = PredVPBB->getHierarchicalSuccessors(); unsigned idx = PredVPSuccessors.front() == this ? 0 : 1; + if (TermBr->getSuccessor(idx) && + PredVPBlock == getPlan()->getVectorLoopRegion() && + PredVPBlock->getNumSuccessors()) { + // Update PRedBB and TermBr for BranchOnMultiCond in predecessor. + PredBB = TermBr->getSuccessor(1); + TermBr = cast(PredBB->getTerminator()); + idx = 0; + } assert(!TermBr->getSuccessor(idx) && "Trying to reset an existing successor block."); TermBr->setSuccessor(idx, IRBB); @@ -908,8 +916,8 @@ VPlanPtr VPlan::createInitialVPlan(Type *InductionTy, VPBasicBlock *MiddleVPBB = new VPBasicBlock("middle.block"); VPBlockUtils::insertBlockAfter(MiddleVPBB, TopRegion); - VPBasicBlock *ScalarPH = new VPBasicBlock("scalar.ph"); if (!RequiresScalarEpilogueCheck) { + VPBasicBlock *ScalarPH = new VPBasicBlock("scalar.ph"); VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH); return Plan; } @@ -923,10 +931,14 @@ VPlanPtr VPlan::createInitialVPlan(Type *InductionTy, // we unconditionally branch to the scalar preheader. Do nothing. // 3) Otherwise, construct a runtime check. BasicBlock *IRExitBlock = TheLoop->getUniqueExitBlock(); - auto *VPExitBlock = VPIRBasicBlock::fromBasicBlock(IRExitBlock); - // The connection order corresponds to the operands of the conditional branch. - VPBlockUtils::insertBlockAfter(VPExitBlock, MiddleVPBB); - VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH); + if (IRExitBlock) { + auto *VPExitBlock = VPIRBasicBlock::fromBasicBlock(IRExitBlock); + // The connection order corresponds to the operands of the conditional + // branch. + VPBlockUtils::insertBlockAfter(VPExitBlock, MiddleVPBB); + VPBasicBlock *ScalarPH = new VPBasicBlock("scalar.ph"); + VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH); + } auto *ScalarLatchTerm = TheLoop->getLoopLatch()->getTerminator(); // Here we use the same DebugLoc as the scalar loop latch terminator instead @@ -1031,7 +1043,9 @@ void VPlan::execute(VPTransformState *State) { // VPlan execution rather than earlier during VPlan construction. BasicBlock *MiddleBB = State->CFG.ExitBB; VPBasicBlock *MiddleVPBB = - cast(getVectorLoopRegion()->getSingleSuccessor()); + getVectorLoopRegion()->getNumSuccessors() == 1 + ? cast(getVectorLoopRegion()->getSuccessors()[0]) + : cast(getVectorLoopRegion()->getSuccessors()[1]); // Find the VPBB for the scalar preheader, relying on the current structure // when creating the middle block and its successrs: if there's a single // predecessor, it must be the scalar preheader. Otherwise, the second @@ -1044,6 +1058,10 @@ void VPlan::execute(VPTransformState *State) { MiddleSuccs.size() == 1 ? MiddleSuccs[0] : MiddleSuccs[1]); assert(!isa(ScalarPhVPBB) && "scalar preheader cannot be wrapped already"); + if (ScalarPhVPBB->getNumSuccessors() != 0) { + ScalarPhVPBB = cast(ScalarPhVPBB->getSuccessors()[1]); + MiddleVPBB = cast(MiddleVPBB->getSuccessors()[1]); + } replaceVPBBWithIRVPBB(ScalarPhVPBB, ScalarPh); replaceVPBBWithIRVPBB(MiddleVPBB, MiddleBB); @@ -1065,6 +1083,10 @@ void VPlan::execute(VPTransformState *State) { VPBasicBlock *LatchVPBB = getVectorLoopRegion()->getExitingBasicBlock(); BasicBlock *VectorLatchBB = State->CFG.VPBB2IRBB[LatchVPBB]; + if (!getVectorLoopRegion()->getSingleSuccessor()) + VectorLatchBB = + cast(VectorLatchBB->getTerminator())->getSuccessor(1); + // Fix the latch value of canonical, reduction and first-order recurrences // phis in the vector loop. VPBasicBlock *Header = getVectorLoopRegion()->getEntryBasicBlock(); @@ -1091,7 +1113,10 @@ void VPlan::execute(VPTransformState *State) { // Move the last step to the end of the latch block. This ensures // consistent placement of all induction updates. Instruction *Inc = cast(Phi->getIncomingValue(1)); - Inc->moveBefore(VectorLatchBB->getTerminator()->getPrevNode()); + if (VectorLatchBB->getTerminator() == &*VectorLatchBB->getFirstNonPHI()) + Inc->moveBefore(VectorLatchBB->getTerminator()); + else + Inc->moveBefore(VectorLatchBB->getTerminator()->getPrevNode()); // Use the steps for the last part as backedge value for the induction. if (auto *IV = dyn_cast(&R)) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 59a084401cc9bf..21f44eac188936 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1274,6 +1274,7 @@ class VPInstruction : public VPRecipeWithIRFlags, // operand). Only generates scalar values (either for the first lane only or // for all lanes, depending on its uses). PtrAdd, + AnyOf, }; private: diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index be3e958320e771..9d5c609ad26043 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -67,6 +67,8 @@ bool VPRecipeBase::mayWriteToMemory() const { default: return true; } + case VPExpandSCEVSC: + return getParent()->getPlan()->getTripCount() == getVPSingleValue(); case VPInterleaveSC: return cast(this)->getNumStoreOperands() > 0; case VPWidenStoreEVLSC: @@ -160,6 +162,8 @@ bool VPRecipeBase::mayHaveSideEffects() const { case VPPredInstPHISC: case VPScalarCastSC: return false; + case VPExpandSCEVSC: + return getParent()->getPlan()->getTripCount() == getVPSingleValue(); case VPInstructionSC: return mayWriteToMemory(); case VPWidenCallSC: { @@ -399,6 +403,7 @@ bool VPInstruction::canGenerateScalarForFirstLane() const { case VPInstruction::CanonicalIVIncrementForPart: case VPInstruction::PtrAdd: case VPInstruction::ExplicitVectorLength: + case VPInstruction::AnyOf: return true; default: return false; @@ -674,6 +679,10 @@ Value *VPInstruction::generate(VPTransformState &State) { } return NewPhi; } + case VPInstruction::AnyOf: { + Value *A = State.get(getOperand(0)); + return Builder.CreateOrReduce(A); + } default: llvm_unreachable("Unsupported opcode for instruction"); @@ -682,7 +691,8 @@ Value *VPInstruction::generate(VPTransformState &State) { bool VPInstruction::isVectorToScalar() const { return getOpcode() == VPInstruction::ExtractFromEnd || - getOpcode() == VPInstruction::ComputeReductionResult; + getOpcode() == VPInstruction::ComputeReductionResult || + getOpcode() == VPInstruction::AnyOf; } bool VPInstruction::isSingleScalar() const { @@ -745,6 +755,7 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const { return false; case Instruction::ICmp: case Instruction::Select: + case Instruction::Or: case VPInstruction::PtrAdd: // TODO: Cover additional opcodes. return vputils::onlyFirstLaneUsed(this); @@ -840,6 +851,9 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent, case VPInstruction::PtrAdd: O << "ptradd"; break; + case VPInstruction::AnyOf: + O << "any-of"; + break; default: O << Instruction::getOpcodeName(getOpcode()); } diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index d50f3c0c3f3e04..a86498eb9aa30c 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -515,6 +515,12 @@ void VPlanTransforms::removeDeadRecipes(VPlan &Plan) { ReversePostOrderTraversal> RPOT( Plan.getEntry()); + for (VPRecipeBase &R : make_early_inc_range( + reverse(*cast(Plan.getPreheader())))) { + if (isDeadRecipe(R)) + R.eraseFromParent(); + } + for (VPBasicBlock *VPBB : reverse(VPBlockUtils::blocksOnly(RPOT))) { // The recipes in the block are processed in reverse order, to catch chains // of dead recipes. @@ -1696,3 +1702,79 @@ void VPlanTransforms::createInterleaveGroups( } } } + +void VPlanTransforms::convertToMultiCond(VPlan &Plan, ScalarEvolution &SE, + Loop *OrigLoop, + VPRecipeBuilder &RecipeBuilder) { + auto *LatchVPBB = + cast(Plan.getVectorLoopRegion()->getExiting()); + VPBuilder Builder(LatchVPBB->getTerminator()); + auto *MiddleVPBB = + cast(Plan.getVectorLoopRegion()->getSingleSuccessor()); + + VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion(); + + const SCEV *BackedgeTakenCount = + SE.getExitCount(OrigLoop, OrigLoop->getLoopLatch()); + const SCEV *TripCount = SE.getTripCountFromExitCount( + BackedgeTakenCount, Plan.getCanonicalIV()->getScalarType(), OrigLoop); + VPValue *NewTC = vputils::getOrCreateVPValueForSCEVExpr(Plan, TripCount, SE); + Plan.getTripCount()->replaceAllUsesWith(NewTC); + Plan.resetTripCount(NewTC); + + VPValue *EarlyExitTaken = nullptr; + SmallVector ExitingBBs; + OrigLoop->getExitingBlocks(ExitingBBs); + for (BasicBlock *Exiting : ExitingBBs) { + auto *ExitingTerm = cast(Exiting->getTerminator()); + BasicBlock *TrueSucc = ExitingTerm->getSuccessor(0); + BasicBlock *FalseSucc = ExitingTerm->getSuccessor(1); + VPIRBasicBlock *VPExitBlock; + if (OrigLoop->getUniqueExitBlock()) + VPExitBlock = cast(MiddleVPBB->getSuccessors()[0]); + else + VPExitBlock = VPIRBasicBlock::fromBasicBlock( + !OrigLoop->contains(TrueSucc) ? TrueSucc : FalseSucc); + + for (VPRecipeBase &R : *VPExitBlock) { + auto *ExitIRI = cast(&R); + auto *ExitPhi = dyn_cast(&ExitIRI->getInstruction()); + if (!ExitPhi) + break; + Value *IncomingValue = ExitPhi->getIncomingValueForBlock(Exiting); + VPValue *V = RecipeBuilder.getVPValueOrAddLiveIn(IncomingValue); + ExitIRI->addOperand(V); + } + + if (Exiting == OrigLoop->getLoopLatch()) { + if (MiddleVPBB->getNumSuccessors() == 0) { + VPBasicBlock *ScalarPH = new VPBasicBlock("scalar.ph"); + VPBlockUtils::connectBlocks(MiddleVPBB, VPExitBlock); + VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH); + } + continue; + } + + VPValue *M = RecipeBuilder.getBlockInMask( + OrigLoop->contains(TrueSucc) ? TrueSucc : FalseSucc); + auto *N = Builder.createNot(M); + EarlyExitTaken = Builder.createNaryOp(VPInstruction::AnyOf, {N}); + + VPBasicBlock *NewMiddle = new VPBasicBlock("middle.split"); + VPBlockUtils::disconnectBlocks(LoopRegion, MiddleVPBB); + VPBlockUtils::insertBlockAfter(NewMiddle, LoopRegion); + VPBlockUtils::connectBlocks(NewMiddle, VPExitBlock); + VPBlockUtils::connectBlocks(NewMiddle, MiddleVPBB); + + VPBuilder MiddleBuilder(NewMiddle); + MiddleBuilder.createNaryOp(VPInstruction::BranchOnCond, {EarlyExitTaken}); + // MiddleVPBB = NewMiddle; + } + auto *Term = dyn_cast(LatchVPBB->getTerminator()); + auto *IsLatchExiting = Builder.createICmp( + CmpInst::ICMP_EQ, Term->getOperand(0), Term->getOperand(1)); + auto *AnyExiting = + Builder.createNaryOp(Instruction::Or, {EarlyExitTaken, IsLatchExiting}); + Builder.createNaryOp(VPInstruction::BranchOnCond, AnyExiting); + Term->eraseFromParent(); +} diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h index 60a44bfb0dca6b..9745211db275f0 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h @@ -123,6 +123,10 @@ struct VPlanTransforms { /// Remove dead recipes from \p Plan. static void removeDeadRecipes(VPlan &Plan); + + static void convertToMultiCond(VPlan &Plan, ScalarEvolution &SE, + Loop *OrigLoop, + VPRecipeBuilder &RecipeBuilder); }; } // namespace llvm diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp index 7ea5ee341cc547..1ac79f8887ab46 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp @@ -248,14 +248,6 @@ bool VPlanVerifier::verifyVPBasicBlock(const VPBasicBlock *VPBB) { return false; } - VPBlockBase *MiddleBB = - IRBB->getPlan()->getVectorLoopRegion()->getSingleSuccessor(); - if (IRBB != IRBB->getPlan()->getPreheader() && - IRBB->getSinglePredecessor() != MiddleBB) { - errs() << "VPIRBasicBlock can only be used as pre-header or a successor of " - "middle-block at the moment!\n"; - return false; - } return true; } diff --git a/llvm/test/Transforms/LoopVectorize/X86/multi-exit-codegen.ll b/llvm/test/Transforms/LoopVectorize/X86/multi-exit-codegen.ll new file mode 100644 index 00000000000000..0c33715c6bd271 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/X86/multi-exit-codegen.ll @@ -0,0 +1,240 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -p loop-vectorize -mcpu=skylake-avx512 -mtriple=x86_64-apple-macosx -force-vector-interleave=1 -S -enable-early-exit-vectorization %s | FileCheck --check-prefix=MULTI %s +; RUN: opt -p loop-vectorize -mcpu=skylake-avx512 -mtriple=x86_64-apple-macosx -force-vector-interleave=1 -S -enable-early-exit-vectorization=false %s | FileCheck --check-prefix=DEFAULT %s + +define i64 @multi_exit_with_store(ptr %p, i64 %N) { +; MULTI-LABEL: define i64 @multi_exit_with_store( +; MULTI-SAME: ptr [[P:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { +; MULTI-NEXT: [[ENTRY:.*]]: +; MULTI-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; MULTI: [[VECTOR_PH]]: +; MULTI-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[N]], i64 0 +; MULTI-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer +; MULTI-NEXT: br label %[[VECTOR_BODY:.*]] +; MULTI: [[VECTOR_BODY]]: +; MULTI-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; MULTI-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; MULTI-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; MULTI-NEXT: [[TMP1:%.*]] = icmp uge <4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] +; MULTI-NEXT: [[TMP2:%.*]] = xor <4 x i1> [[TMP1]], +; MULTI-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[P]], i64 [[TMP0]] +; MULTI-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[TMP3]], i32 0 +; MULTI-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> zeroinitializer, ptr [[TMP4]], i32 4, <4 x i1> [[TMP2]]) +; MULTI-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; MULTI-NEXT: [[TMP5:%.*]] = xor <4 x i1> [[TMP2]], +; MULTI-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]]) +; MULTI-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128 +; MULTI-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; MULTI-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]] +; MULTI-NEXT: br i1 [[TMP8]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; MULTI: [[MIDDLE_SPLIT]]: +; MULTI-NEXT: br i1 [[TMP6]], label %[[E1:.*]], label %[[MIDDLE_BLOCK:.*]] +; MULTI: [[MIDDLE_BLOCK]]: +; MULTI-NEXT: br i1 true, label %[[E2:.*]], label %[[SCALAR_PH]] +; MULTI: [[SCALAR_PH]]: +; MULTI-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 128, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; MULTI-NEXT: br label %[[LOOP_HEADER:.*]] +; MULTI: [[LOOP_HEADER]]: +; MULTI-NEXT: [[I_07:%.*]] = phi i64 [ [[INC:%.*]], %[[LOOP_LATCH:.*]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] +; MULTI-NEXT: [[CMP1:%.*]] = icmp uge i64 [[I_07]], [[N]] +; MULTI-NEXT: br i1 [[CMP1]], label %[[E1]], label %[[LOOP_LATCH]] +; MULTI: [[LOOP_LATCH]]: +; MULTI-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[P]], i64 [[I_07]] +; MULTI-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 +; MULTI-NEXT: [[INC]] = add nuw i64 [[I_07]], 1 +; MULTI-NEXT: [[CMP_NOT:%.*]] = icmp eq i64 [[INC]], 128 +; MULTI-NEXT: br i1 [[CMP_NOT]], label %[[E2]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP3:![0-9]+]] +; MULTI: [[E1]]: +; MULTI-NEXT: ret i64 0 +; MULTI: [[E2]]: +; MULTI-NEXT: ret i64 1 +; +; DEFAULT-LABEL: define i64 @multi_exit_with_store( +; DEFAULT-SAME: ptr [[P:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { +; DEFAULT-NEXT: [[ENTRY:.*]]: +; DEFAULT-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[N]], i64 127) +; DEFAULT-NEXT: [[TMP4:%.*]] = add nuw nsw i64 [[UMIN]], 1 +; DEFAULT-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 [[TMP4]], 4 +; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; DEFAULT: [[VECTOR_PH]]: +; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP4]], 4 +; DEFAULT-NEXT: [[TMP5:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 +; DEFAULT-NEXT: [[TMP2:%.*]] = select i1 [[TMP5]], i64 4, i64 [[N_MOD_VF]] +; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP4]], [[TMP2]] +; DEFAULT-NEXT: br label %[[VECTOR_BODY:.*]] +; DEFAULT: [[VECTOR_BODY]]: +; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; DEFAULT-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; DEFAULT-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 [[TMP0]] +; DEFAULT-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 +; DEFAULT-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP3]], align 4 +; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; DEFAULT-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; DEFAULT-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; DEFAULT: [[MIDDLE_BLOCK]]: +; DEFAULT-NEXT: br label %[[SCALAR_PH]] +; DEFAULT: [[SCALAR_PH]]: +; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; DEFAULT-NEXT: br label %[[LOOP_HEADER:.*]] +; DEFAULT: [[LOOP_HEADER]]: +; DEFAULT-NEXT: [[I_07:%.*]] = phi i64 [ [[INC:%.*]], %[[LOOP_LATCH:.*]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] +; DEFAULT-NEXT: [[CMP1:%.*]] = icmp uge i64 [[I_07]], [[N]] +; DEFAULT-NEXT: br i1 [[CMP1]], label %[[E1:.*]], label %[[LOOP_LATCH]] +; DEFAULT: [[LOOP_LATCH]]: +; DEFAULT-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[P]], i64 [[I_07]] +; DEFAULT-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 +; DEFAULT-NEXT: [[INC]] = add nuw i64 [[I_07]], 1 +; DEFAULT-NEXT: [[CMP_NOT:%.*]] = icmp eq i64 [[INC]], 128 +; DEFAULT-NEXT: br i1 [[CMP_NOT]], label %[[E2:.*]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP3:![0-9]+]] +; DEFAULT: [[E1]]: +; DEFAULT-NEXT: ret i64 0 +; DEFAULT: [[E2]]: +; DEFAULT-NEXT: ret i64 1 +; +entry: + br label %loop.header + +loop.header: + %iv = phi i64 [ %inc, %loop.latch ], [ 0, %entry ] + %c.1 = icmp uge i64 %iv, %N + br i1 %c.1, label %e1, label %loop.latch + +loop.latch: + %arrayidx = getelementptr inbounds nuw i32, ptr %p, i64 %iv + store i32 0, ptr %arrayidx + %inc = add nuw i64 %iv, 1 + %c.2 = icmp eq i64 %inc, 128 + br i1 %c.2, label %e2, label %loop.header, !llvm.loop !1 + +e1: + ret i64 0 + +e2: + ret i64 1 +} + +define i64 @multi_exiting_to_same_exit_with_store(ptr %p, i64 %N) { +; MULTI-LABEL: define i64 @multi_exiting_to_same_exit_with_store( +; MULTI-SAME: ptr [[P:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { +; MULTI-NEXT: [[ENTRY:.*]]: +; MULTI-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; MULTI: [[VECTOR_PH]]: +; MULTI-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[N]], i64 0 +; MULTI-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer +; MULTI-NEXT: br label %[[VECTOR_BODY:.*]] +; MULTI: [[VECTOR_BODY]]: +; MULTI-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; MULTI-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; MULTI-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; MULTI-NEXT: [[TMP1:%.*]] = icmp uge <4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] +; MULTI-NEXT: [[TMP2:%.*]] = xor <4 x i1> [[TMP1]], +; MULTI-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[P]], i64 [[TMP0]] +; MULTI-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[TMP3]], i32 0 +; MULTI-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> zeroinitializer, ptr [[TMP4]], i32 4, <4 x i1> [[TMP2]]) +; MULTI-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; MULTI-NEXT: [[TMP5:%.*]] = xor <4 x i1> [[TMP2]], +; MULTI-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]]) +; MULTI-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128 +; MULTI-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; MULTI-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]] +; MULTI-NEXT: br i1 [[TMP8]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; MULTI: [[MIDDLE_SPLIT]]: +; MULTI-NEXT: br i1 [[TMP6]], label %[[E:.*]], label %[[MIDDLE_BLOCK:.*]] +; MULTI: [[MIDDLE_BLOCK]]: +; MULTI-NEXT: br i1 true, label %[[E]], label %[[SCALAR_PH]] +; MULTI: [[SCALAR_PH]]: +; MULTI-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 128, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; MULTI-NEXT: br label %[[LOOP_HEADER:.*]] +; MULTI: [[LOOP_HEADER]]: +; MULTI-NEXT: [[IV:%.*]] = phi i64 [ [[INC:%.*]], %[[LOOP_LATCH:.*]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] +; MULTI-NEXT: [[C_1:%.*]] = icmp uge i64 [[IV]], [[N]] +; MULTI-NEXT: br i1 [[C_1]], label %[[E]], label %[[LOOP_LATCH]] +; MULTI: [[LOOP_LATCH]]: +; MULTI-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[P]], i64 [[IV]] +; MULTI-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 +; MULTI-NEXT: [[INC]] = add nuw i64 [[IV]], 1 +; MULTI-NEXT: [[C_2:%.*]] = icmp eq i64 [[INC]], 128 +; MULTI-NEXT: br i1 [[C_2]], label %[[E]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP5:![0-9]+]] +; MULTI: [[E]]: +; MULTI-NEXT: [[P1:%.*]] = phi i64 [ 0, %[[LOOP_HEADER]] ], [ 1, %[[LOOP_LATCH]] ], [ 0, %[[MIDDLE_BLOCK]] ], [ 1, %[[MIDDLE_SPLIT]] ] +; MULTI-NEXT: ret i64 [[P1]] +; +; DEFAULT-LABEL: define i64 @multi_exiting_to_same_exit_with_store( +; DEFAULT-SAME: ptr [[P:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { +; DEFAULT-NEXT: [[ENTRY:.*]]: +; DEFAULT-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[N]], i64 127) +; DEFAULT-NEXT: [[TMP0:%.*]] = add nuw nsw i64 [[UMIN]], 1 +; DEFAULT-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 [[TMP0]], 4 +; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; DEFAULT: [[VECTOR_PH]]: +; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 4 +; DEFAULT-NEXT: [[TMP1:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 +; DEFAULT-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i64 4, i64 [[N_MOD_VF]] +; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[TMP2]] +; DEFAULT-NEXT: br label %[[VECTOR_BODY:.*]] +; DEFAULT: [[VECTOR_BODY]]: +; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; DEFAULT-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 +; DEFAULT-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 [[TMP3]] +; DEFAULT-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 0 +; DEFAULT-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP5]], align 4 +; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; DEFAULT-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; DEFAULT-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; DEFAULT: [[MIDDLE_BLOCK]]: +; DEFAULT-NEXT: br label %[[SCALAR_PH]] +; DEFAULT: [[SCALAR_PH]]: +; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; DEFAULT-NEXT: br label %[[LOOP_HEADER:.*]] +; DEFAULT: [[LOOP_HEADER]]: +; DEFAULT-NEXT: [[IV:%.*]] = phi i64 [ [[INC:%.*]], %[[LOOP_LATCH:.*]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] +; DEFAULT-NEXT: [[C_1:%.*]] = icmp uge i64 [[IV]], [[N]] +; DEFAULT-NEXT: br i1 [[C_1]], label %[[E:.*]], label %[[LOOP_LATCH]] +; DEFAULT: [[LOOP_LATCH]]: +; DEFAULT-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[P]], i64 [[IV]] +; DEFAULT-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 +; DEFAULT-NEXT: [[INC]] = add nuw i64 [[IV]], 1 +; DEFAULT-NEXT: [[C_2:%.*]] = icmp eq i64 [[INC]], 128 +; DEFAULT-NEXT: br i1 [[C_2]], label %[[E]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP5:![0-9]+]] +; DEFAULT: [[E]]: +; DEFAULT-NEXT: [[P1:%.*]] = phi i64 [ 0, %[[LOOP_HEADER]] ], [ 1, %[[LOOP_LATCH]] ] +; DEFAULT-NEXT: ret i64 [[P1]] +; +entry: + br label %loop.header + +loop.header: + %iv = phi i64 [ %inc, %loop.latch ], [ 0, %entry ] + %c.1 = icmp uge i64 %iv, %N + br i1 %c.1, label %e, label %loop.latch + +loop.latch: + %arrayidx = getelementptr inbounds nuw i32, ptr %p, i64 %iv + store i32 0, ptr %arrayidx + %inc = add nuw i64 %iv, 1 + %c.2 = icmp eq i64 %inc, 128 + br i1 %c.2, label %e, label %loop.header, !llvm.loop !1 + +e: + %p1 = phi i64 [ 0, %loop.header ], [ 1, %loop.latch ] + ret i64 %p1 +} + +!1 = distinct !{!1, !2, !3} +!2 = !{!"llvm.loop.vectorize.width", i32 4} +!3 = !{!"llvm.loop.vectorize.enable", i1 true} +;. +; MULTI: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} +; MULTI: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} +; MULTI: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} +; MULTI: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} +; MULTI: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} +; MULTI: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]} +;. +; DEFAULT: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} +; DEFAULT: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} +; DEFAULT: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} +; DEFAULT: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} +; DEFAULT: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} +; DEFAULT: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]} +;. diff --git a/llvm/test/Transforms/LoopVectorize/X86/multi-exit-cost.ll b/llvm/test/Transforms/LoopVectorize/X86/multi-exit-cost.ll index cd128979fc1431..1c02f10753745c 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/multi-exit-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/multi-exit-cost.ll @@ -5,18 +5,18 @@ define i64 @test_value_in_exit_compare_chain_used_outside(ptr %src, i64 %x, i64 ; CHECK-LABEL: define i64 @test_value_in_exit_compare_chain_used_outside( ; CHECK-SAME: ptr [[SRC:%.*]], i64 [[X:%.*]], i64 range(i64 1, 32) [[N:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[ENTRY:.*]]: -; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[N]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = freeze i64 [[TMP0]] -; CHECK-NEXT: [[UMIN2:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP1]], i64 [[X]]) -; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[UMIN2]], 1 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 [[TMP2]], 8 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]] -; CHECK: [[VECTOR_SCEVCHECK]]: ; CHECK-NEXT: [[TMP3:%.*]] = add nsw i64 [[N]], -1 ; CHECK-NEXT: [[TMP4:%.*]] = freeze i64 [[TMP3]] ; CHECK-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP4]], i64 [[X]]) -; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[UMIN]] to i1 -; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i64 [[UMIN]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[UMIN]], 1 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 [[TMP2]], 8 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]] +; CHECK: [[VECTOR_SCEVCHECK]]: +; CHECK-NEXT: [[TMP32:%.*]] = add nsw i64 [[N]], -1 +; CHECK-NEXT: [[TMP33:%.*]] = freeze i64 [[TMP32]] +; CHECK-NEXT: [[UMIN1:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP33]], i64 [[X]]) +; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[UMIN1]] to i1 +; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i64 [[UMIN1]], 1 ; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]] ; CHECK-NEXT: br i1 [[TMP7]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: diff --git a/llvm/test/Transforms/LoopVectorize/X86/multi-exit-vplan.ll b/llvm/test/Transforms/LoopVectorize/X86/multi-exit-vplan.ll new file mode 100644 index 00000000000000..5c5d532b93bc89 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/X86/multi-exit-vplan.ll @@ -0,0 +1,148 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -p loop-vectorize -mcpu=skylake-avx512 -mtriple=x86_64-apple-macosx -force-vector-interleave=1 -S -enable-early-exit-vectorization -debug %s 2>&1 | FileCheck %s + +define i64 @multi_exiting_to_different_exits_with_store(ptr %p, i64 %N) { +; CHECK-LABEL: VPlan 'Final VPlan for VF={4},UF={1}' { +; CHECK-NEXT: Live-in vp<[[VF:%.+]]> = VF +; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF +; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count +; CHECK-NEXT: Live-in ir<128> = original trip-count +; CHECK-EMPTY: +; CHECK-NEXT: vector.ph: +; CHECK-NEXT: Successor(s): vector loop +; CHECK-EMPTY: +; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vector.body: +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next> +; CHECK-NEXT: WIDEN-INDUCTION %iv = phi %inc, 0, ir<1>, vp<[[VF]]> +; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> +; CHECK-NEXT: WIDEN ir<%c.1> = icmp uge ir<%iv>, ir<%N> +; CHECK-NEXT: EMIT vp<[[NOT1:%.+]]> = not ir<%c.1> +; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr ir<%p>, vp<[[STEPS]]> +; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%arrayidx> +; CHECK-NEXT: WIDEN store vp<[[VEC_PTR]]>, ir<0>, vp<[[NOT1]]> +; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> +; CHECK-NEXT: EMIT vp<[[NOT2:%.+]]> = not vp<[[NOT1]]> +; CHECK-NEXT: EMIT vp<[[EA_TAKEN:%.+]]> = any-of vp<[[NOT2]]> +; CHECK-NEXT: EMIT vp<[[LATCH_CMP:%.+]]> = icmp eq vp<%index.next>, vp<[[VTC]]> +; CHECK-NEXT: EMIT vp<[[EC:%.+]]> = or vp<[[EA_TAKEN]]>, vp<[[LATCH_CMP]]> +; CHECK-NEXT: EMIT branch-on-cond vp<[[EC]]> +; CHECK-NEXT: No successors +; CHECK-NEXT: } +; CHECK-NEXT: Successor(s): middle.split +; CHECK-EMPTY: +; CHECK-NEXT: middle.split: +; CHECK-NEXT: EMIT branch-on-cond vp<[[EA_TAKEN]]> +; CHECK-NEXT: Successor(s): ir-bb, middle.block +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %p1 = phi i64 [ 0, %loop.header ] (extra operand: ir<0>) +; CHECK-NEXT: No successors +; CHECK-EMPTY: +; CHECK-NEXT: middle.block: +; CHECK-NEXT: EMIT vp<[[MIDDLE_CMP:%.+]]> = icmp eq ir<128>, vp<[[VTC]]> +; CHECK-NEXT: EMIT branch-on-cond vp<[[MIDDLE_CMP]]> +; CHECK-NEXT: Successor(s): ir-bb, scalar.ph +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %p2 = phi i64 [ 1, %loop.latch ] (extra operand: ir<1>) +; CHECK-NEXT: No successors +; CHECK-EMPTY: +; CHECK-NEXT: scalar.ph: +; CHECK-NEXT: No successors +; CHECK-NEXT: } +; +entry: + br label %loop.header + +loop.header: + %iv = phi i64 [ %inc, %loop.latch ], [ 0, %entry ] + %c.1 = icmp uge i64 %iv, %N + br i1 %c.1, label %e1, label %loop.latch + +loop.latch: + %arrayidx = getelementptr inbounds nuw i32, ptr %p, i64 %iv + store i32 0, ptr %arrayidx + %inc = add nuw i64 %iv, 1 + %c.2 = icmp eq i64 %inc, 128 + br i1 %c.2, label %e2, label %loop.header, !llvm.loop !1 + +e1: + %p1 = phi i64 [ 0, %loop.header ] + ret i64 %p1 + +e2: + %p2 = phi i64 [ 1, %loop.latch ] + ret i64 %p2 +} + +define i64 @multi_exiting_to_same_exit_with_store(ptr %p, i64 %N) { +; CHECK-LABEL: VPlan 'Final VPlan for VF={4},UF={1}' { +; CHECK-NEXT: Live-in vp<[[VF:%.+]]> = VF +; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF +; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count +; CHECK-NEXT: Live-in ir<128> = original trip-count +; CHECK-EMPTY: +; CHECK-NEXT: vector.ph: +; CHECK-NEXT: Successor(s): vector loop +; CHECK-EMPTY: +; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vector.body: +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next> +; CHECK-NEXT: WIDEN-INDUCTION %iv = phi %inc, 0, ir<1>, vp<[[VF]]> +; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> +; CHECK-NEXT: WIDEN ir<%c.1> = icmp uge ir<%iv>, ir<%N> +; CHECK-NEXT: EMIT vp<[[NOT1:%.+]]> = not ir<%c.1> +; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr ir<%p>, vp<[[STEPS]]> +; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%arrayidx> +; CHECK-NEXT: WIDEN store vp<[[VEC_PTR]]>, ir<0>, vp<[[NOT1]]> +; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> +; CHECK-NEXT: EMIT vp<[[NOT2:%.+]]> = not vp<[[NOT1]]> +; CHECK-NEXT: EMIT vp<[[EA_TAKEN:%.+]]> = any-of vp<[[NOT2]]> +; CHECK-NEXT: EMIT vp<[[LATCH_CMP:%.+]]> = icmp eq vp<%index.next>, vp<[[VTC]]> +; CHECK-NEXT: EMIT vp<[[EC:%.+]]> = or vp<[[EA_TAKEN]]>, vp<[[LATCH_CMP]]> +; CHECK-NEXT: EMIT branch-on-cond vp<[[EC]]> +; CHECK-NEXT: No successors +; CHECK-NEXT: } +; CHECK-NEXT: Successor(s): middle.split +; CHECK-EMPTY: +; CHECK-NEXT: middle.split: +; CHECK-NEXT: EMIT branch-on-cond vp<[[EA_TAKEN]]> +; CHECK-NEXT: Successor(s): ir-bb, middle.block +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %p1 = phi i64 [ 0, %loop.header ], [ 1, %loop.latch ] (extra operand: ir<0>, ir<1>) +; CHECK-NEXT: No successors +; CHECK-EMPTY: +; CHECK-NEXT: middle.block: +; CHECK-NEXT: EMIT vp<[[MIDDLE_CMP:%.+]]> = icmp eq ir<128>, vp<[[VTC]]> +; CHECK-NEXT: EMIT branch-on-cond vp<[[MIDDLE_CMP]]> +; CHECK-NEXT: Successor(s): ir-bb, scalar.ph +; CHECK-EMPTY: +; CHECK-NEXT: scalar.ph: +; CHECK-NEXT: No successors +; CHECK-NEXT: } +; +entry: + br label %loop.header + +loop.header: + %iv = phi i64 [ %inc, %loop.latch ], [ 0, %entry ] + %c.1 = icmp uge i64 %iv, %N + br i1 %c.1, label %e, label %loop.latch + +loop.latch: + %arrayidx = getelementptr inbounds nuw i32, ptr %p, i64 %iv + store i32 0, ptr %arrayidx + %inc = add nuw i64 %iv, 1 + %c.2 = icmp eq i64 %inc, 128 + br i1 %c.2, label %e, label %loop.header, !llvm.loop !1 + +e: + %p1 = phi i64 [ 0, %loop.header ], [ 1, %loop.latch ] + ret i64 %p1 +} + +!1 = distinct !{!1, !2, !3} +!2 = !{!"llvm.loop.vectorize.width", i32 4} +!3 = !{!"llvm.loop.vectorize.enable", i1 true} From 3831acb97053230cb09f8316ce1ada17be50564c Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 31 Oct 2024 19:48:40 +0000 Subject: [PATCH 03/19] !fixup address first set of comments, thanks! --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 13 ++----------- llvm/lib/Transforms/Vectorize/VPlan.cpp | 16 +--------------- 2 files changed, 3 insertions(+), 26 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 3a69750460f4d8..80a0fda81aeaf1 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7711,16 +7711,13 @@ DenseMap LoopVectorizationPlanner::executePlan( BestVPlan.execute(&State); // 2.5 Collect reduction resume values. - VPBasicBlock *ExitVPBB = nullptr; - if (BestVPlan.getVectorLoopRegion()->getSingleSuccessor()) { - ExitVPBB = cast( - BestVPlan.getVectorLoopRegion()->getSingleSuccessor()); + VPBasicBlock *ExitVPBB = + cast(BestVPlan.getVectorLoopRegion()->getSingleSuccessor()); if (VectorizingEpilogue) for (VPRecipeBase &R : *ExitVPBB) { fixReductionScalarResumeWhenVectorizingEpilog( &R, State, State.CFG.VPBB2IRBB[ExitVPBB]); } - } // 2.6. Maintain Loop Hints // Keep all loop hints from the original loop on the vector loop (we'll @@ -8809,8 +8806,6 @@ static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, bool HasNUW, static SetVector collectUsersInExitBlock( Loop *OrigLoop, VPRecipeBuilder &Builder, VPlan &Plan, const MapVector &Inductions) { - if (!Plan.getVectorLoopRegion()->getSingleSuccessor()) - return {}; auto *MiddleVPBB = cast(Plan.getVectorLoopRegion()->getSingleSuccessor()); // No edge from the middle block to the unique exit block has been inserted @@ -8896,8 +8891,6 @@ static void addLiveOutsForFirstOrderRecurrences( // TODO: Should be replaced by // Plan->getScalarLoopRegion()->getSinglePredecessor() in the future once the // scalar region is modeled as well. - if (!VectorRegion->getSingleSuccessor()) - return; auto *MiddleVPBB = cast(VectorRegion->getSingleSuccessor()); VPBasicBlock *ScalarPHVPBB = nullptr; if (MiddleVPBB->getNumSuccessors() == 2) { @@ -9447,8 +9440,6 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( } VPBasicBlock *LatchVPBB = VectorLoopRegion->getExitingBasicBlock(); Builder.setInsertPoint(&*LatchVPBB->begin()); - if (!VectorLoopRegion->getSingleSuccessor()) - return; VPBasicBlock *MiddleVPBB = cast(VectorLoopRegion->getSingleSuccessor()); VPBasicBlock::iterator IP = MiddleVPBB->getFirstNonPhi(); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index be3579b4cd5dde..4032468b4f76da 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -474,14 +474,6 @@ void VPIRBasicBlock::execute(VPTransformState *State) { // backedges. A backward successor is set when the branch is created. const auto &PredVPSuccessors = PredVPBB->getHierarchicalSuccessors(); unsigned idx = PredVPSuccessors.front() == this ? 0 : 1; - if (TermBr->getSuccessor(idx) && - PredVPBlock == getPlan()->getVectorLoopRegion() && - PredVPBlock->getNumSuccessors()) { - // Update PRedBB and TermBr for BranchOnMultiCond in predecessor. - PredBB = TermBr->getSuccessor(1); - TermBr = cast(PredBB->getTerminator()); - idx = 0; - } assert(!TermBr->getSuccessor(idx) && "Trying to reset an existing successor block."); TermBr->setSuccessor(idx, IRBB); @@ -1043,9 +1035,7 @@ void VPlan::execute(VPTransformState *State) { // VPlan execution rather than earlier during VPlan construction. BasicBlock *MiddleBB = State->CFG.ExitBB; VPBasicBlock *MiddleVPBB = - getVectorLoopRegion()->getNumSuccessors() == 1 - ? cast(getVectorLoopRegion()->getSuccessors()[0]) - : cast(getVectorLoopRegion()->getSuccessors()[1]); + cast(getVectorLoopRegion()->getSingleSuccessor()); // Find the VPBB for the scalar preheader, relying on the current structure // when creating the middle block and its successrs: if there's a single // predecessor, it must be the scalar preheader. Otherwise, the second @@ -1083,10 +1073,6 @@ void VPlan::execute(VPTransformState *State) { VPBasicBlock *LatchVPBB = getVectorLoopRegion()->getExitingBasicBlock(); BasicBlock *VectorLatchBB = State->CFG.VPBB2IRBB[LatchVPBB]; - if (!getVectorLoopRegion()->getSingleSuccessor()) - VectorLatchBB = - cast(VectorLatchBB->getTerminator())->getSuccessor(1); - // Fix the latch value of canonical, reduction and first-order recurrences // phis in the vector loop. VPBasicBlock *Header = getVectorLoopRegion()->getEntryBasicBlock(); From 64db0eea4073e1cdc3d394155754ed0653ca0c3d Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Mon, 4 Nov 2024 21:10:24 +0000 Subject: [PATCH 04/19] !fixup clean up merge failures --- llvm/lib/Transforms/Vectorize/VPlan.cpp | 18 ++++++++++-------- llvm/lib/Transforms/Vectorize/VPlan.h | 4 ++-- .../Transforms/Vectorize/VPlanTransforms.cpp | 11 ++--------- .../LoopVectorize/X86/multi-exit-vplan.ll | 11 ++++++++++- 4 files changed, 24 insertions(+), 20 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index 924b63bbd9639f..e2c063928e9906 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -911,7 +911,6 @@ VPlanPtr VPlan::createInitialVPlan(Type *InductionTy, VPBasicBlock *ScalarPH = new VPBasicBlock("scalar.ph"); VPBlockUtils::connectBlocks(ScalarPH, ScalarHeader); if (!RequiresScalarEpilogueCheck) { - VPBasicBlock *ScalarPH = new VPBasicBlock("scalar.ph"); VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH); return Plan; } @@ -925,14 +924,17 @@ VPlanPtr VPlan::createInitialVPlan(Type *InductionTy, // we unconditionally branch to the scalar preheader. Do nothing. // 3) Otherwise, construct a runtime check. BasicBlock *IRExitBlock = TheLoop->getUniqueExitBlock(); - if (IRExitBlock) { - auto *VPExitBlock = VPIRBasicBlock::fromBasicBlock(IRExitBlock); - // The connection order corresponds to the operands of the conditional - // branch. - VPBlockUtils::insertBlockAfter(VPExitBlock, MiddleVPBB); - VPBasicBlock *ScalarPH = new VPBasicBlock("scalar.ph"); - VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH); + if (!IRExitBlock) { + auto *Term = cast(TheLoop->getLoopLatch()->getTerminator()); + IRExitBlock = TheLoop->contains(Term->getSuccessor(0)) + ? Term->getSuccessor(1) + : Term->getSuccessor(0); } + auto *VPExitBlock = VPIRBasicBlock::fromBasicBlock(IRExitBlock); + // The connection order corresponds to the operands of the conditional + // branch. + VPBlockUtils::insertBlockAfter(VPExitBlock, MiddleVPBB); + VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH); auto *ScalarLatchTerm = TheLoop->getLoopLatch()->getTerminator(); // Here we use the same DebugLoc as the scalar loop latch terminator instead diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 861fea7ff469ff..8efa648a7e1ea7 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -3822,10 +3822,10 @@ class VPlan { /// whether to execute the scalar tail loop or the exit block from the loop /// latch. const VPBasicBlock *getMiddleBlock() const { - return cast(getVectorLoopRegion()->getSingleSuccessor()); + return cast(getScalarPreheader()->getSinglePredecessor()); } VPBasicBlock *getMiddleBlock() { - return cast(getVectorLoopRegion()->getSingleSuccessor()); + return cast(getScalarPreheader()->getSinglePredecessor()); } /// The trip count of the original loop. diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index eed3efd666e712..1369693b01971c 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1797,8 +1797,7 @@ void VPlanTransforms::convertToMultiCond(VPlan &Plan, ScalarEvolution &SE, auto *LatchVPBB = cast(Plan.getVectorLoopRegion()->getExiting()); VPBuilder Builder(LatchVPBB->getTerminator()); - auto *MiddleVPBB = - cast(Plan.getVectorLoopRegion()->getSingleSuccessor()); + auto *MiddleVPBB = Plan.getMiddleBlock(); VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion(); @@ -1818,7 +1817,7 @@ void VPlanTransforms::convertToMultiCond(VPlan &Plan, ScalarEvolution &SE, BasicBlock *TrueSucc = ExitingTerm->getSuccessor(0); BasicBlock *FalseSucc = ExitingTerm->getSuccessor(1); VPIRBasicBlock *VPExitBlock; - if (OrigLoop->getUniqueExitBlock()) + if (OrigLoop->getUniqueExitBlock() || Exiting == OrigLoop->getLoopLatch()) VPExitBlock = cast(MiddleVPBB->getSuccessors()[0]); else VPExitBlock = VPIRBasicBlock::fromBasicBlock( @@ -1835,11 +1834,6 @@ void VPlanTransforms::convertToMultiCond(VPlan &Plan, ScalarEvolution &SE, } if (Exiting == OrigLoop->getLoopLatch()) { - if (MiddleVPBB->getNumSuccessors() == 0) { - VPBasicBlock *ScalarPH = new VPBasicBlock("scalar.ph"); - VPBlockUtils::connectBlocks(MiddleVPBB, VPExitBlock); - VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH); - } continue; } @@ -1856,7 +1850,6 @@ void VPlanTransforms::convertToMultiCond(VPlan &Plan, ScalarEvolution &SE, VPBuilder MiddleBuilder(NewMiddle); MiddleBuilder.createNaryOp(VPInstruction::BranchOnCond, {EarlyExitTaken}); - // MiddleVPBB = NewMiddle; } auto *Term = dyn_cast(LatchVPBB->getTerminator()); auto *IsLatchExiting = Builder.createICmp( diff --git a/llvm/test/Transforms/LoopVectorize/X86/multi-exit-vplan.ll b/llvm/test/Transforms/LoopVectorize/X86/multi-exit-vplan.ll index 5c5d532b93bc89..47304c571bfcb1 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/multi-exit-vplan.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/multi-exit-vplan.ll @@ -49,9 +49,13 @@ define i64 @multi_exiting_to_different_exits_with_store(ptr %p, i64 %N) { ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph: +; CHECK-NEXT: ir-bb +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %iv = phi i64 [ %inc, %loop.latch ], [ 0, %entry ] +; CHECK-NEXT: IR %c.1 = icmp uge i64 %iv, %N ; CHECK-NEXT: No successors ; CHECK-NEXT: } -; entry: br label %loop.header @@ -120,6 +124,11 @@ define i64 @multi_exiting_to_same_exit_with_store(ptr %p, i64 %N) { ; CHECK-NEXT: Successor(s): ir-bb, scalar.ph ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph: +; CHECK-NEXT: ir-bb +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %iv = phi i64 [ %inc, %loop.latch ], [ 0, %entry ] +; CHECK-NEXT: IR %c.1 = icmp uge i64 %iv, %N ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; From 0f8aedfaf89bc6dbe18bd00e6bad0aad52db10f3 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Tue, 5 Nov 2024 13:50:43 +0000 Subject: [PATCH 05/19] !fixup address latest comments, thanks! --- .../Vectorize/LoopVectorizationLegality.cpp | 8 +- .../Transforms/Vectorize/LoopVectorize.cpp | 121 ++++++++++-------- llvm/lib/Transforms/Vectorize/VPlan.cpp | 13 +- .../Transforms/Vectorize/VPlanTransforms.cpp | 48 +++---- .../Transforms/Vectorize/VPlanTransforms.h | 6 +- .../LoopVectorize/X86/multi-exit-vplan.ll | 82 +----------- 6 files changed, 98 insertions(+), 180 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp index 0d8bda5a2112c7..ed3808d2f30bf1 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp @@ -43,10 +43,6 @@ AllowStridedPointerIVs("lv-strided-pointer-ivs", cl::init(false), cl::Hidden, cl::desc("Enable recognition of non-constant strided " "pointer induction variables.")); -static cl::opt - EnableEarlyExitVectorization("enable-early-exit-vectorization", - cl::init(false), cl::Hidden, cl::desc("")); - namespace llvm { cl::opt HintsAllowReordering("hints-allow-reordering", cl::init(true), cl::Hidden, @@ -1381,7 +1377,7 @@ bool LoopVectorizationLegality::isFixedOrderRecurrence( bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) const { // When vectorizing early exits, create predicates for all blocks, except the // header. - if (canVectorizeEarlyExit() && BB != TheLoop->getHeader()) + if (hasUncountableEarlyExit() && BB != TheLoop->getHeader()) return true; return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT); } @@ -1523,8 +1519,6 @@ bool LoopVectorizationLegality::canVectorizeEarlyExit() const { // Currently only allow vectorizing loops with early exits, if early-exit // vectorization is explicitly enabled and the loop has metadata to force // vectorization. - if (!EnableEarlyExitVectorization) - return false; SmallVector Exiting; TheLoop->getExitingBlocks(Exiting); diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 60607b7cf6b46c..47e78a916f8cea 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -384,6 +384,11 @@ static cl::opt UseWiderVFIfCallVariantsPresent( cl::Hidden, cl::desc("Try wider VFs if they enable the use of vector variants")); +static cl::opt EnableEarlyExitVectorization( + "enable-early-exit-vectorization", cl::init(false), cl::Hidden, + cl::desc( + "Enable vectorization of early exit loops with uncountable exits.")); + // Likelyhood of bypassing the vectorized loop because assumptions about SCEV // variables not overflowing do not hold. See `emitSCEVChecks`. static constexpr uint32_t SCEVCheckBypassWeights[] = {1, 127}; @@ -1358,14 +1363,13 @@ class LoopVectorizationCostModel { LLVM_DEBUG(dbgs() << "LV: Loop does not require scalar epilogue\n"); return false; } - // If we might exit from anywhere but the latch, must run the exiting - // iteration in scalar form. - if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) { - if (!Legal->canVectorizeEarlyExit()) { - LLVM_DEBUG( - dbgs() << "LV: Loop requires scalar epilogue: multiple exits\n"); - return true; - } + // If we might exit from anywhere but the latch and early exit vectorization + // is disabled, we must run the exiting iteration in scalar form. + if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch() && + !(EnableEarlyExitVectorization && Legal->hasUncountableEarlyExit())) { + LLVM_DEBUG( + dbgs() << "LV: Loop requires scalar epilogue: multiple exits\n"); + return true; } if (IsVectorizing && InterleaveInfo.requiresScalarEpilogue()) { LLVM_DEBUG(dbgs() << "LV: Loop requires scalar epilogue: " @@ -2576,7 +2580,7 @@ void InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) { assert(LoopVectorPreHeader && "Invalid loop structure"); LoopExitBlock = OrigLoop->getUniqueExitBlock(); // may be nullptr assert((LoopExitBlock || Cost->requiresScalarEpilogue(VF.isVector()) || - Legal->canVectorizeEarlyExit()) && + Legal->hasUncountableEarlyExit()) && "multiple exit loop without required epilogue?"); LoopMiddleBlock = @@ -2809,6 +2813,8 @@ void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi, } } + assert((MissingVals.empty() || OrigLoop->getUniqueExitBlock()) && + "Expected a single exit block for escaping values"); for (auto &I : MissingVals) { PHINode *PHI = cast(I.first); // One corner case we have to handle is two IVs "chasing" each-other, @@ -2819,9 +2825,6 @@ void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi, if (PHI->getBasicBlockIndex(MiddleBlock) == -1) PHI->addIncoming(I.second, MiddleBlock); } - - assert((MissingVals.empty() || OrigLoop->getUniqueExitBlock()) && - "Expected a single exit block"); } namespace { @@ -3597,7 +3600,7 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) { for (BasicBlock *E : Exiting) { auto *Cmp = dyn_cast(E->getTerminator()->getOperand(0)); if (Cmp && TheLoop->contains(Cmp) && Cmp->hasOneUse() && - (TheLoop->getLoopLatch() == E || !Legal->canVectorizeEarlyExit())) + (TheLoop->getLoopLatch() == E || !Legal->hasUncountableEarlyExit())) AddToWorklistIfAllowed(Cmp); } @@ -8144,7 +8147,7 @@ VPValue *VPRecipeBuilder::createEdgeMask(BasicBlock *Src, BasicBlock *Dst) { // If source is an exiting block, we know the exit edge is dynamically dead // in the vector loop, and thus we don't need to restrict the mask. Avoid // adding uses of an otherwise potentially dead instruction. - if (!Legal->canVectorizeEarlyExit() && OrigLoop->isLoopExiting(Src)) + if (!Legal->hasUncountableEarlyExit() && OrigLoop->isLoopExiting(Src)) return EdgeMaskCache[Edge] = SrcMask; VPValue *EdgeMask = getVPValueOrAddLiveIn(BI->getCondition()); @@ -8835,39 +8838,43 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan) { static SetVector collectUsersInExitBlock( Loop *OrigLoop, VPRecipeBuilder &Builder, VPlan &Plan, const MapVector &Inductions) { - auto *MiddleVPBB = Plan.getMiddleBlock(); - // No edge from the middle block to the unique exit block has been inserted - // and there is nothing to fix from vector loop; phis should have incoming - // from scalar loop only. - if (MiddleVPBB->getNumSuccessors() != 2) - return {}; SetVector ExitUsersToFix; - VPBasicBlock *ExitVPBB = cast(MiddleVPBB->getSuccessors()[0]); - BasicBlock *ExitingBB = OrigLoop->getExitingBlock(); - for (VPRecipeBase &R : *ExitVPBB) { - auto *ExitIRI = dyn_cast(&R); - if (!ExitIRI) - continue; - auto *ExitPhi = dyn_cast(&ExitIRI->getInstruction()); - if (!ExitPhi) - break; - Value *IncomingValue = ExitPhi->getIncomingValueForBlock(ExitingBB); - VPValue *V = Builder.getVPValueOrAddLiveIn(IncomingValue); - // Exit values for inductions are computed and updated outside of VPlan and - // independent of induction recipes. - // TODO: Compute induction exit values in VPlan. - if ((isa(V) && - !cast(V)->getTruncInst()) || - isa(V) || - (isa(IncomingValue) && - OrigLoop->contains(cast(IncomingValue)) && - any_of(IncomingValue->users(), [&Inductions](User *U) { - auto *P = dyn_cast(U); - return P && Inductions.contains(P); - }))) + for (VPBlockBase *VPB : vp_depth_first_shallow( + Plan.getVectorLoopRegion()->getSingleSuccessor())) { + if (VPB->getNumSuccessors() != 0 || VPB == Plan.getScalarHeader()) continue; - ExitUsersToFix.insert(ExitIRI); - ExitIRI->addOperand(V); + auto *ExitVPBB = cast(VPB); + BasicBlock *ExitBB = ExitVPBB->getIRBasicBlock(); + BasicBlock *ExitingBB = find_singleton( + to_vector(predecessors(ExitBB)), + [OrigLoop](BasicBlock *Pred, bool AllowRepeats) { + return OrigLoop->contains(Pred) ? Pred : nullptr; + }); + for (VPRecipeBase &R : *ExitVPBB) { + auto *ExitIRI = dyn_cast(&R); + if (!ExitIRI) + continue; + auto *ExitPhi = dyn_cast(&ExitIRI->getInstruction()); + if (!ExitPhi) + break; + Value *IncomingValue = ExitPhi->getIncomingValueForBlock(ExitingBB); + VPValue *V = Builder.getVPValueOrAddLiveIn(IncomingValue); + // Exit values for inductions are computed and updated outside of VPlan + // and independent of induction recipes. + // TODO: Compute induction exit values in VPlan. + if ((isa(V) && + !cast(V)->getTruncInst()) || + isa(V) || + (isa(IncomingValue) && + OrigLoop->contains(cast(IncomingValue)) && + any_of(IncomingValue->users(), [&Inductions](User *U) { + auto *P = dyn_cast(U); + return P && Inductions.contains(P); + }))) + continue; + ExitUsersToFix.insert(ExitIRI); + ExitIRI->addOperand(V); + } } return ExitUsersToFix; } @@ -9168,16 +9175,15 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) { "VPBasicBlock"); RecipeBuilder.fixHeaderPhis(); - if (Legal->canVectorizeEarlyExit()) { - VPlanTransforms::convertToMultiCond(*Plan, *PSE.getSE(), OrigLoop, - RecipeBuilder); - } else { + if (Legal->hasUncountableEarlyExit()) { + VPlanTransforms::handleUncountableEarlyExit(*Plan, *PSE.getSE(), OrigLoop, + RecipeBuilder); + } addScalarResumePhis(RecipeBuilder, *Plan); SetVector ExitUsersToFix = collectUsersInExitBlock( OrigLoop, RecipeBuilder, *Plan, Legal->getInductionVars()); addExitUsersForFirstOrderRecurrences(*Plan, ExitUsersToFix); addUsersInExitBlock(*Plan, ExitUsersToFix); - } // --------------------------------------------------------------------------- // Transform initial VPlan: Apply previously taken decisions, in order, to @@ -9940,12 +9946,15 @@ bool LoopVectorizePass::processLoop(Loop *L) { } if (LVL.hasUncountableEarlyExit()) { - reportVectorizationFailure("Auto-vectorization of loops with uncountable " - "early exit is not yet supported", - "Auto-vectorization of loops with uncountable " - "early exit is not yet supported", - "UncountableEarlyExitLoopsUnsupported", ORE, L); - return false; + if (!EnableEarlyExitVectorization) { + reportVectorizationFailure("Auto-vectorization of loops with uncountable " + "early exit is not yet supported", + "Auto-vectorization of loops with uncountable " + "early exit is not yet supported", + "UncountableEarlyExitLoopsUnsupported", ORE, + L); + return false; + } } // Entrance to the VPlan-native vectorization path. Outer loops are processed diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index ed2d6aa5f3df4b..1c609d89c47b9b 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -878,15 +878,9 @@ VPlanPtr VPlan::createInitialVPlan(Type *InductionTy, auto Plan = std::make_unique(Entry, VecPreheader, ScalarHeader); // Create SCEV and VPValue for the trip count. - - // Currently only loops with countable exits are vectorized, but calling - // getSymbolicMaxBackedgeTakenCount allows enablement work for loops with - // uncountable exits whilst also ensuring the symbolic maximum and known - // back-edge taken count remain identical for loops with countable exits. + // We use the symbolic max backedge-taken-count, which is used when + // vectorizing loops with uncountable early exits const SCEV *BackedgeTakenCountSCEV = PSE.getSymbolicMaxBackedgeTakenCount(); - assert((!isa(BackedgeTakenCountSCEV) && - BackedgeTakenCountSCEV == PSE.getBackedgeTakenCount()) && - "Invalid loop count"); ScalarEvolution &SE = *PSE.getSE(); const SCEV *TripCount = SE.getTripCountFromExitCount(BackedgeTakenCountSCEV, InductionTy, TheLoop); @@ -922,6 +916,9 @@ VPlanPtr VPlan::createInitialVPlan(Type *InductionTy, // 3) Otherwise, construct a runtime check. BasicBlock *IRExitBlock = TheLoop->getUniqueExitBlock(); if (!IRExitBlock) { + // If there's no unique exit block (i.e. vectorizing with an uncountable + // early exit), use the block exiting from the latch. The other uncountable + // exit blocks will be added later. auto *Term = cast(TheLoop->getLoopLatch()->getTerminator()); IRExitBlock = TheLoop->contains(Term->getSuccessor(0)) ? Term->getSuccessor(1) diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 1369693b01971c..d336827b23ddf3 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1791,51 +1791,33 @@ void VPlanTransforms::createInterleaveGroups( } } -void VPlanTransforms::convertToMultiCond(VPlan &Plan, ScalarEvolution &SE, - Loop *OrigLoop, - VPRecipeBuilder &RecipeBuilder) { +void VPlanTransforms::handleUncountableEarlyExit( + VPlan &Plan, ScalarEvolution &SE, Loop *OrigLoop, + VPRecipeBuilder &RecipeBuilder) { auto *LatchVPBB = cast(Plan.getVectorLoopRegion()->getExiting()); VPBuilder Builder(LatchVPBB->getTerminator()); auto *MiddleVPBB = Plan.getMiddleBlock(); - VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion(); - const SCEV *BackedgeTakenCount = - SE.getExitCount(OrigLoop, OrigLoop->getLoopLatch()); - const SCEV *TripCount = SE.getTripCountFromExitCount( - BackedgeTakenCount, Plan.getCanonicalIV()->getScalarType(), OrigLoop); - VPValue *NewTC = vputils::getOrCreateVPValueForSCEVExpr(Plan, TripCount, SE); - Plan.getTripCount()->replaceAllUsesWith(NewTC); - Plan.resetTripCount(NewTC); - VPValue *EarlyExitTaken = nullptr; SmallVector ExitingBBs; OrigLoop->getExitingBlocks(ExitingBBs); + + // Process all uncountable exiting blocks. For each exiting block, update the + // EarlyExitTaken, which tracks if any uncountable early exit has been taken. + // Also split the middle block and branch to the exit block for the early exit + // if it has been taken. for (BasicBlock *Exiting : ExitingBBs) { + if (Exiting == OrigLoop->getLoopLatch()) + continue; + auto *ExitingTerm = cast(Exiting->getTerminator()); BasicBlock *TrueSucc = ExitingTerm->getSuccessor(0); BasicBlock *FalseSucc = ExitingTerm->getSuccessor(1); VPIRBasicBlock *VPExitBlock; - if (OrigLoop->getUniqueExitBlock() || Exiting == OrigLoop->getLoopLatch()) - VPExitBlock = cast(MiddleVPBB->getSuccessors()[0]); - else - VPExitBlock = VPIRBasicBlock::fromBasicBlock( - !OrigLoop->contains(TrueSucc) ? TrueSucc : FalseSucc); - - for (VPRecipeBase &R : *VPExitBlock) { - auto *ExitIRI = cast(&R); - auto *ExitPhi = dyn_cast(&ExitIRI->getInstruction()); - if (!ExitPhi) - break; - Value *IncomingValue = ExitPhi->getIncomingValueForBlock(Exiting); - VPValue *V = RecipeBuilder.getVPValueOrAddLiveIn(IncomingValue); - ExitIRI->addOperand(V); - } - - if (Exiting == OrigLoop->getLoopLatch()) { - continue; - } + VPExitBlock = VPIRBasicBlock::fromBasicBlock( + !OrigLoop->contains(TrueSucc) ? TrueSucc : FalseSucc); VPValue *M = RecipeBuilder.getBlockInMask( OrigLoop->contains(TrueSucc) ? TrueSucc : FalseSucc); @@ -1851,6 +1833,10 @@ void VPlanTransforms::convertToMultiCond(VPlan &Plan, ScalarEvolution &SE, VPBuilder MiddleBuilder(NewMiddle); MiddleBuilder.createNaryOp(VPInstruction::BranchOnCond, {EarlyExitTaken}); } + + // Replace the condition controlling the exit from the vector loop with one + // exiting if either the original condition of the vector latch is true or any + // early exit has been taken. auto *Term = dyn_cast(LatchVPBB->getTerminator()); auto *IsLatchExiting = Builder.createICmp( CmpInst::ICMP_EQ, Term->getOperand(0), Term->getOperand(1)); diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h index b5b7144568ff54..f7bbae25279fce 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h @@ -124,9 +124,9 @@ struct VPlanTransforms { /// Remove dead recipes from \p Plan. static void removeDeadRecipes(VPlan &Plan); - static void convertToMultiCond(VPlan &Plan, ScalarEvolution &SE, - Loop *OrigLoop, - VPRecipeBuilder &RecipeBuilder); + static void handleUncountableEarlyExit(VPlan &Plan, ScalarEvolution &SE, + Loop *OrigLoop, + VPRecipeBuilder &RecipeBuilder); }; } // namespace llvm diff --git a/llvm/test/Transforms/LoopVectorize/X86/multi-exit-vplan.ll b/llvm/test/Transforms/LoopVectorize/X86/multi-exit-vplan.ll index 47304c571bfcb1..5da97ef8f9b3bc 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/multi-exit-vplan.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/multi-exit-vplan.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 ; RUN: opt -p loop-vectorize -mcpu=skylake-avx512 -mtriple=x86_64-apple-macosx -force-vector-interleave=1 -S -enable-early-exit-vectorization -debug %s 2>&1 | FileCheck %s +declare void @init(ptr) + define i64 @multi_exiting_to_different_exits_with_store(ptr %p, i64 %N) { ; CHECK-LABEL: VPlan 'Final VPlan for VF={4},UF={1}' { ; CHECK-NEXT: Live-in vp<[[VF:%.+]]> = VF @@ -57,16 +59,18 @@ define i64 @multi_exiting_to_different_exits_with_store(ptr %p, i64 %N) { ; CHECK-NEXT: No successors ; CHECK-NEXT: } entry: + %src = alloca [128 x i32] + call void @init(ptr %src) br label %loop.header loop.header: %iv = phi i64 [ %inc, %loop.latch ], [ 0, %entry ] - %c.1 = icmp uge i64 %iv, %N + %gep.src = getelementptr inbounds i32, ptr %src, i64 %iv + %l = load i32, ptr %gep.src + %c.1 = icmp eq i32 %l, 10 br i1 %c.1, label %e1, label %loop.latch loop.latch: - %arrayidx = getelementptr inbounds nuw i32, ptr %p, i64 %iv - store i32 0, ptr %arrayidx %inc = add nuw i64 %iv, 1 %c.2 = icmp eq i64 %inc, 128 br i1 %c.2, label %e2, label %loop.header, !llvm.loop !1 @@ -80,78 +84,6 @@ e2: ret i64 %p2 } -define i64 @multi_exiting_to_same_exit_with_store(ptr %p, i64 %N) { -; CHECK-LABEL: VPlan 'Final VPlan for VF={4},UF={1}' { -; CHECK-NEXT: Live-in vp<[[VF:%.+]]> = VF -; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF -; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count -; CHECK-NEXT: Live-in ir<128> = original trip-count -; CHECK-EMPTY: -; CHECK-NEXT: vector.ph: -; CHECK-NEXT: Successor(s): vector loop -; CHECK-EMPTY: -; CHECK-NEXT: vector loop: { -; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next> -; CHECK-NEXT: WIDEN-INDUCTION %iv = phi %inc, 0, ir<1>, vp<[[VF]]> -; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> -; CHECK-NEXT: WIDEN ir<%c.1> = icmp uge ir<%iv>, ir<%N> -; CHECK-NEXT: EMIT vp<[[NOT1:%.+]]> = not ir<%c.1> -; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr ir<%p>, vp<[[STEPS]]> -; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%arrayidx> -; CHECK-NEXT: WIDEN store vp<[[VEC_PTR]]>, ir<0>, vp<[[NOT1]]> -; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> -; CHECK-NEXT: EMIT vp<[[NOT2:%.+]]> = not vp<[[NOT1]]> -; CHECK-NEXT: EMIT vp<[[EA_TAKEN:%.+]]> = any-of vp<[[NOT2]]> -; CHECK-NEXT: EMIT vp<[[LATCH_CMP:%.+]]> = icmp eq vp<%index.next>, vp<[[VTC]]> -; CHECK-NEXT: EMIT vp<[[EC:%.+]]> = or vp<[[EA_TAKEN]]>, vp<[[LATCH_CMP]]> -; CHECK-NEXT: EMIT branch-on-cond vp<[[EC]]> -; CHECK-NEXT: No successors -; CHECK-NEXT: } -; CHECK-NEXT: Successor(s): middle.split -; CHECK-EMPTY: -; CHECK-NEXT: middle.split: -; CHECK-NEXT: EMIT branch-on-cond vp<[[EA_TAKEN]]> -; CHECK-NEXT: Successor(s): ir-bb, middle.block -; CHECK-EMPTY: -; CHECK-NEXT: ir-bb: -; CHECK-NEXT: IR %p1 = phi i64 [ 0, %loop.header ], [ 1, %loop.latch ] (extra operand: ir<0>, ir<1>) -; CHECK-NEXT: No successors -; CHECK-EMPTY: -; CHECK-NEXT: middle.block: -; CHECK-NEXT: EMIT vp<[[MIDDLE_CMP:%.+]]> = icmp eq ir<128>, vp<[[VTC]]> -; CHECK-NEXT: EMIT branch-on-cond vp<[[MIDDLE_CMP]]> -; CHECK-NEXT: Successor(s): ir-bb, scalar.ph -; CHECK-EMPTY: -; CHECK-NEXT: scalar.ph: -; CHECK-NEXT: ir-bb -; CHECK-EMPTY: -; CHECK-NEXT: ir-bb: -; CHECK-NEXT: IR %iv = phi i64 [ %inc, %loop.latch ], [ 0, %entry ] -; CHECK-NEXT: IR %c.1 = icmp uge i64 %iv, %N -; CHECK-NEXT: No successors -; CHECK-NEXT: } -; -entry: - br label %loop.header - -loop.header: - %iv = phi i64 [ %inc, %loop.latch ], [ 0, %entry ] - %c.1 = icmp uge i64 %iv, %N - br i1 %c.1, label %e, label %loop.latch - -loop.latch: - %arrayidx = getelementptr inbounds nuw i32, ptr %p, i64 %iv - store i32 0, ptr %arrayidx - %inc = add nuw i64 %iv, 1 - %c.2 = icmp eq i64 %inc, 128 - br i1 %c.2, label %e, label %loop.header, !llvm.loop !1 - -e: - %p1 = phi i64 [ 0, %loop.header ], [ 1, %loop.latch ] - ret i64 %p1 -} - !1 = distinct !{!1, !2, !3} !2 = !{!"llvm.loop.vectorize.width", i32 4} !3 = !{!"llvm.loop.vectorize.enable", i1 true} From 9212f9647692387fb4a6ff8cc6a90b1fa2b73628 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Tue, 5 Nov 2024 15:07:50 +0000 Subject: [PATCH 06/19] [VPlan] Generalize collectUsersInExitBlocks for multiple exit bbs. Generalize collectUsersInExitBlock to collecting exit users in multiple exit blocks. Exit blocks are leaf nodes in the VPlan (without successors) except the scalar header. Split off from https://github.com/llvm/llvm-project/pull/112138 --- .../Transforms/Vectorize/LoopVectorize.cpp | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 47e78a916f8cea..4ccbc8e95d1edf 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8830,12 +8830,12 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan) { } } -// Collect VPIRInstructions for phis in the original exit block that are modeled +// Collect VPIRInstructions for phis in the exit blocks that are modeled // in VPlan and add the exiting VPValue as operand. Some exiting values are not // modeled explicitly yet and won't be included. Those are un-truncated // VPWidenIntOrFpInductionRecipe, VPWidenPointerInductionRecipe and induction // increments. -static SetVector collectUsersInExitBlock( +static SetVector collectUsersInExitBlocks( Loop *OrigLoop, VPRecipeBuilder &Builder, VPlan &Plan, const MapVector &Inductions) { SetVector ExitUsersToFix; @@ -8882,8 +8882,8 @@ static SetVector collectUsersInExitBlock( // Add exit values to \p Plan. Extracts are added for each entry in \p // ExitUsersToFix if needed and their operands are updated. static void -addUsersInExitBlock(VPlan &Plan, - const SetVector &ExitUsersToFix) { +addUsersInExitBlocks(VPlan &Plan, + const SetVector &ExitUsersToFix) { if (ExitUsersToFix.empty()) return; @@ -9179,12 +9179,11 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) { VPlanTransforms::handleUncountableEarlyExit(*Plan, *PSE.getSE(), OrigLoop, RecipeBuilder); } - addScalarResumePhis(RecipeBuilder, *Plan); - SetVector ExitUsersToFix = collectUsersInExitBlock( - OrigLoop, RecipeBuilder, *Plan, Legal->getInductionVars()); - addExitUsersForFirstOrderRecurrences(*Plan, ExitUsersToFix); - addUsersInExitBlock(*Plan, ExitUsersToFix); - + addScalarResumePhis(RecipeBuilder, *Plan); + SetVector ExitUsersToFix = collectUsersInExitBlocks( + OrigLoop, RecipeBuilder, *Plan, Legal->getInductionVars()); + addExitUsersForFirstOrderRecurrences(*Plan, ExitUsersToFix); + addUsersInExitBlocks(*Plan, ExitUsersToFix); // --------------------------------------------------------------------------- // Transform initial VPlan: Apply previously taken decisions, in order, to // bring the VPlan to its final state. From 5cb0851d69513eee7c14f21b598a014925ec6ae1 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Tue, 5 Nov 2024 15:36:04 +0000 Subject: [PATCH 07/19] !fixup address comments --- .../Transforms/Vectorize/LoopVectorize.cpp | 62 ++--- .../Transforms/Vectorize/VPlanTransforms.cpp | 10 +- .../LoopVectorize/X86/multi-exit-vplan.ll | 89 ------- .../X86/uncountable-early-exit-vplan.ll | 244 ++++++++++++++++++ 4 files changed, 283 insertions(+), 122 deletions(-) delete mode 100644 llvm/test/Transforms/LoopVectorize/X86/multi-exit-vplan.ll create mode 100644 llvm/test/Transforms/LoopVectorize/X86/uncountable-early-exit-vplan.ll diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 4ccbc8e95d1edf..5188aeed2f32b5 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8845,11 +8845,6 @@ static SetVector collectUsersInExitBlocks( continue; auto *ExitVPBB = cast(VPB); BasicBlock *ExitBB = ExitVPBB->getIRBasicBlock(); - BasicBlock *ExitingBB = find_singleton( - to_vector(predecessors(ExitBB)), - [OrigLoop](BasicBlock *Pred, bool AllowRepeats) { - return OrigLoop->contains(Pred) ? Pred : nullptr; - }); for (VPRecipeBase &R : *ExitVPBB) { auto *ExitIRI = dyn_cast(&R); if (!ExitIRI) @@ -8857,23 +8852,27 @@ static SetVector collectUsersInExitBlocks( auto *ExitPhi = dyn_cast(&ExitIRI->getInstruction()); if (!ExitPhi) break; - Value *IncomingValue = ExitPhi->getIncomingValueForBlock(ExitingBB); - VPValue *V = Builder.getVPValueOrAddLiveIn(IncomingValue); - // Exit values for inductions are computed and updated outside of VPlan - // and independent of induction recipes. - // TODO: Compute induction exit values in VPlan. - if ((isa(V) && - !cast(V)->getTruncInst()) || - isa(V) || - (isa(IncomingValue) && - OrigLoop->contains(cast(IncomingValue)) && - any_of(IncomingValue->users(), [&Inductions](User *U) { - auto *P = dyn_cast(U); - return P && Inductions.contains(P); - }))) - continue; - ExitUsersToFix.insert(ExitIRI); - ExitIRI->addOperand(V); + for (BasicBlock *ExitingBB : predecessors(ExitBB)) { + if (!OrigLoop->contains(ExitingBB)) + continue; + Value *IncomingValue = ExitPhi->getIncomingValueForBlock(ExitingBB); + VPValue *V = Builder.getVPValueOrAddLiveIn(IncomingValue); + // Exit values for inductions are computed and updated outside of VPlan + // and independent of induction recipes. + // TODO: Compute induction exit values in VPlan. + if ((isa(V) && + !cast(V)->getTruncInst()) || + isa(V) || + (isa(IncomingValue) && + OrigLoop->contains(cast(IncomingValue)) && + any_of(IncomingValue->users(), [&Inductions](User *U) { + auto *P = dyn_cast(U); + return P && Inductions.contains(P); + }))) + continue; + ExitUsersToFix.insert(ExitIRI); + ExitIRI->addOperand(V); + } } } return ExitUsersToFix; @@ -8887,23 +8886,26 @@ addUsersInExitBlocks(VPlan &Plan, if (ExitUsersToFix.empty()) return; - auto *MiddleVPBB = Plan.getMiddleBlock(); - VPBuilder B(MiddleVPBB, MiddleVPBB->getFirstNonPhi()); - // Introduce extract for exiting values and update the VPIRInstructions // modeling the corresponding LCSSA phis. for (VPIRInstruction *ExitIRI : ExitUsersToFix) { + VPValue *V = ExitIRI->getOperand(0); // Pass live-in values used by exit phis directly through to their users in // the exit block. if (V->isLiveIn()) continue; - LLVMContext &Ctx = ExitIRI->getInstruction().getContext(); - VPValue *Ext = B.createNaryOp(VPInstruction::ExtractFromEnd, - {V, Plan.getOrAddLiveIn(ConstantInt::get( - IntegerType::get(Ctx, 32), 1))}); - ExitIRI->setOperand(0, Ext); + for (VPBlockBase *PredVPB : ExitIRI->getParent()->getPredecessors()) { + auto *PredVPBB = cast(PredVPB); + VPBuilder B(PredVPBB, PredVPBB->getFirstNonPhi()); + + LLVMContext &Ctx = ExitIRI->getInstruction().getContext(); + VPValue *Ext = B.createNaryOp(VPInstruction::ExtractFromEnd, + {V, Plan.getOrAddLiveIn(ConstantInt::get( + IntegerType::get(Ctx, 32), 1))}); + ExitIRI->setOperand(0, Ext); + } } } diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index d336827b23ddf3..661b100d7881c2 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1815,9 +1815,13 @@ void VPlanTransforms::handleUncountableEarlyExit( auto *ExitingTerm = cast(Exiting->getTerminator()); BasicBlock *TrueSucc = ExitingTerm->getSuccessor(0); BasicBlock *FalseSucc = ExitingTerm->getSuccessor(1); - VPIRBasicBlock *VPExitBlock; - VPExitBlock = VPIRBasicBlock::fromBasicBlock( - !OrigLoop->contains(TrueSucc) ? TrueSucc : FalseSucc); + VPBasicBlock *VPExitBlock; + if (OrigLoop->getUniqueExitBlock()) { + VPExitBlock = cast(MiddleVPBB->getSuccessors()[0]); + } else { + VPExitBlock = VPIRBasicBlock::fromBasicBlock( + !OrigLoop->contains(TrueSucc) ? TrueSucc : FalseSucc); + } VPValue *M = RecipeBuilder.getBlockInMask( OrigLoop->contains(TrueSucc) ? TrueSucc : FalseSucc); diff --git a/llvm/test/Transforms/LoopVectorize/X86/multi-exit-vplan.ll b/llvm/test/Transforms/LoopVectorize/X86/multi-exit-vplan.ll deleted file mode 100644 index 5da97ef8f9b3bc..00000000000000 --- a/llvm/test/Transforms/LoopVectorize/X86/multi-exit-vplan.ll +++ /dev/null @@ -1,89 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt -p loop-vectorize -mcpu=skylake-avx512 -mtriple=x86_64-apple-macosx -force-vector-interleave=1 -S -enable-early-exit-vectorization -debug %s 2>&1 | FileCheck %s - -declare void @init(ptr) - -define i64 @multi_exiting_to_different_exits_with_store(ptr %p, i64 %N) { -; CHECK-LABEL: VPlan 'Final VPlan for VF={4},UF={1}' { -; CHECK-NEXT: Live-in vp<[[VF:%.+]]> = VF -; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF -; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count -; CHECK-NEXT: Live-in ir<128> = original trip-count -; CHECK-EMPTY: -; CHECK-NEXT: vector.ph: -; CHECK-NEXT: Successor(s): vector loop -; CHECK-EMPTY: -; CHECK-NEXT: vector loop: { -; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next> -; CHECK-NEXT: WIDEN-INDUCTION %iv = phi %inc, 0, ir<1>, vp<[[VF]]> -; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> -; CHECK-NEXT: WIDEN ir<%c.1> = icmp uge ir<%iv>, ir<%N> -; CHECK-NEXT: EMIT vp<[[NOT1:%.+]]> = not ir<%c.1> -; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr ir<%p>, vp<[[STEPS]]> -; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%arrayidx> -; CHECK-NEXT: WIDEN store vp<[[VEC_PTR]]>, ir<0>, vp<[[NOT1]]> -; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> -; CHECK-NEXT: EMIT vp<[[NOT2:%.+]]> = not vp<[[NOT1]]> -; CHECK-NEXT: EMIT vp<[[EA_TAKEN:%.+]]> = any-of vp<[[NOT2]]> -; CHECK-NEXT: EMIT vp<[[LATCH_CMP:%.+]]> = icmp eq vp<%index.next>, vp<[[VTC]]> -; CHECK-NEXT: EMIT vp<[[EC:%.+]]> = or vp<[[EA_TAKEN]]>, vp<[[LATCH_CMP]]> -; CHECK-NEXT: EMIT branch-on-cond vp<[[EC]]> -; CHECK-NEXT: No successors -; CHECK-NEXT: } -; CHECK-NEXT: Successor(s): middle.split -; CHECK-EMPTY: -; CHECK-NEXT: middle.split: -; CHECK-NEXT: EMIT branch-on-cond vp<[[EA_TAKEN]]> -; CHECK-NEXT: Successor(s): ir-bb, middle.block -; CHECK-EMPTY: -; CHECK-NEXT: ir-bb: -; CHECK-NEXT: IR %p1 = phi i64 [ 0, %loop.header ] (extra operand: ir<0>) -; CHECK-NEXT: No successors -; CHECK-EMPTY: -; CHECK-NEXT: middle.block: -; CHECK-NEXT: EMIT vp<[[MIDDLE_CMP:%.+]]> = icmp eq ir<128>, vp<[[VTC]]> -; CHECK-NEXT: EMIT branch-on-cond vp<[[MIDDLE_CMP]]> -; CHECK-NEXT: Successor(s): ir-bb, scalar.ph -; CHECK-EMPTY: -; CHECK-NEXT: ir-bb: -; CHECK-NEXT: IR %p2 = phi i64 [ 1, %loop.latch ] (extra operand: ir<1>) -; CHECK-NEXT: No successors -; CHECK-EMPTY: -; CHECK-NEXT: scalar.ph: -; CHECK-NEXT: ir-bb -; CHECK-EMPTY: -; CHECK-NEXT: ir-bb: -; CHECK-NEXT: IR %iv = phi i64 [ %inc, %loop.latch ], [ 0, %entry ] -; CHECK-NEXT: IR %c.1 = icmp uge i64 %iv, %N -; CHECK-NEXT: No successors -; CHECK-NEXT: } -entry: - %src = alloca [128 x i32] - call void @init(ptr %src) - br label %loop.header - -loop.header: - %iv = phi i64 [ %inc, %loop.latch ], [ 0, %entry ] - %gep.src = getelementptr inbounds i32, ptr %src, i64 %iv - %l = load i32, ptr %gep.src - %c.1 = icmp eq i32 %l, 10 - br i1 %c.1, label %e1, label %loop.latch - -loop.latch: - %inc = add nuw i64 %iv, 1 - %c.2 = icmp eq i64 %inc, 128 - br i1 %c.2, label %e2, label %loop.header, !llvm.loop !1 - -e1: - %p1 = phi i64 [ 0, %loop.header ] - ret i64 %p1 - -e2: - %p2 = phi i64 [ 1, %loop.latch ] - ret i64 %p2 -} - -!1 = distinct !{!1, !2, !3} -!2 = !{!"llvm.loop.vectorize.width", i32 4} -!3 = !{!"llvm.loop.vectorize.enable", i1 true} diff --git a/llvm/test/Transforms/LoopVectorize/X86/uncountable-early-exit-vplan.ll b/llvm/test/Transforms/LoopVectorize/X86/uncountable-early-exit-vplan.ll new file mode 100644 index 00000000000000..13f5671f893651 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/X86/uncountable-early-exit-vplan.ll @@ -0,0 +1,244 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -p loop-vectorize -mcpu=skylake-avx512 -mtriple=x86_64-apple-macosx -force-vector-interleave=1 -S -enable-early-exit-vectorization -debug %s 2>&1 | FileCheck %s + +declare void @init(ptr) + +define i64 @multi_exiting_to_different_exits_live_in_exit_values() { +; CHECK-LABEL: VPlan 'Final VPlan for VF={4},UF={1}' { +; CHECK-NEXT: Live-in vp<[[VF:%.+]]> = VF +; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF +; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count +; CHECK-NEXT: Live-in ir<128> = original trip-count +; CHECK-EMPTY: +; CHECK-NEXT: vector.ph: +; CHECK-NEXT: Successor(s): vector loop +; CHECK-EMPTY: +; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vector.body: +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next> +; CHECK-NEXT: WIDEN-INDUCTION %iv = phi %inc, 0, ir<1>, vp<[[VF]]> +; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> +; CHECK-NEXT: WIDEN ir<%c.1> = icmp uge ir<%iv>, ir<%N> +; CHECK-NEXT: EMIT vp<[[NOT1:%.+]]> = not ir<%c.1> +; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr ir<%p>, vp<[[STEPS]]> +; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%arrayidx> +; CHECK-NEXT: WIDEN store vp<[[VEC_PTR]]>, ir<0>, vp<[[NOT1]]> +; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> +; CHECK-NEXT: EMIT vp<[[NOT2:%.+]]> = not vp<[[NOT1]]> +; CHECK-NEXT: EMIT vp<[[EA_TAKEN:%.+]]> = any-of vp<[[NOT2]]> +; CHECK-NEXT: EMIT vp<[[LATCH_CMP:%.+]]> = icmp eq vp<%index.next>, vp<[[VTC]]> +; CHECK-NEXT: EMIT vp<[[EC:%.+]]> = or vp<[[EA_TAKEN]]>, vp<[[LATCH_CMP]]> +; CHECK-NEXT: EMIT branch-on-cond vp<[[EC]]> +; CHECK-NEXT: No successors +; CHECK-NEXT: } +; CHECK-NEXT: Successor(s): middle.split +; CHECK-EMPTY: +; CHECK-NEXT: middle.split: +; CHECK-NEXT: EMIT branch-on-cond vp<[[EA_TAKEN]]> +; CHECK-NEXT: Successor(s): ir-bb, middle.block +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %p1 = phi i64 [ 0, %loop.header ] (extra operand: ir<0>) +; CHECK-NEXT: No successors +; CHECK-EMPTY: +; CHECK-NEXT: middle.block: +; CHECK-NEXT: EMIT vp<[[MIDDLE_CMP:%.+]]> = icmp eq ir<128>, vp<[[VTC]]> +; CHECK-NEXT: EMIT branch-on-cond vp<[[MIDDLE_CMP]]> +; CHECK-NEXT: Successor(s): ir-bb, scalar.ph +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %p2 = phi i64 [ 1, %loop.latch ] (extra operand: ir<1>) +; CHECK-NEXT: No successors +; CHECK-EMPTY: +; CHECK-NEXT: scalar.ph: +; CHECK-NEXT: ir-bb +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %iv = phi i64 [ %inc, %loop.latch ], [ 0, %entry ] +; CHECK-NEXT: IR %c.1 = icmp uge i64 %iv, %N +; CHECK-NEXT: No successors +; CHECK-NEXT: } +entry: + %src = alloca [128 x i32] + call void @init(ptr %src) + br label %loop.header + +loop.header: + %iv = phi i64 [ %inc, %loop.latch ], [ 0, %entry ] + %gep.src = getelementptr inbounds i32, ptr %src, i64 %iv + %l = load i32, ptr %gep.src + %c.1 = icmp eq i32 %l, 10 + br i1 %c.1, label %e1, label %loop.latch + +loop.latch: + %inc = add nuw i64 %iv, 1 + %c.2 = icmp eq i64 %inc, 128 + br i1 %c.2, label %e2, label %loop.header + +e1: + %p1 = phi i64 [ 0, %loop.header ] + ret i64 %p1 + +e2: + %p2 = phi i64 [ 1, %loop.latch ] + ret i64 %p2 +} + +define i64 @multi_exiting_to_different_exits_load_exit_value() { +; CHECK-LABEL: VPlan 'Final VPlan for VF={4},UF={1}' { +; CHECK-NEXT: Live-in vp<[[VF:%.+]]> = VF +; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF +; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count +; CHECK-NEXT: Live-in ir<128> = original trip-count +; CHECK-EMPTY: +; CHECK-NEXT: vector.ph: +; CHECK-NEXT: Successor(s): vector loop +; CHECK-EMPTY: +; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vector.body: +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next> +; CHECK-NEXT: WIDEN-INDUCTION %iv = phi %inc, 0, ir<1>, vp<[[VF]]> +; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> +; CHECK-NEXT: WIDEN ir<%c.1> = icmp uge ir<%iv>, ir<%N> +; CHECK-NEXT: EMIT vp<[[NOT1:%.+]]> = not ir<%c.1> +; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr ir<%p>, vp<[[STEPS]]> +; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%arrayidx> +; CHECK-NEXT: WIDEN store vp<[[VEC_PTR]]>, ir<0>, vp<[[NOT1]]> +; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> +; CHECK-NEXT: EMIT vp<[[NOT2:%.+]]> = not vp<[[NOT1]]> +; CHECK-NEXT: EMIT vp<[[EA_TAKEN:%.+]]> = any-of vp<[[NOT2]]> +; CHECK-NEXT: EMIT vp<[[LATCH_CMP:%.+]]> = icmp eq vp<%index.next>, vp<[[VTC]]> +; CHECK-NEXT: EMIT vp<[[EC:%.+]]> = or vp<[[EA_TAKEN]]>, vp<[[LATCH_CMP]]> +; CHECK-NEXT: EMIT branch-on-cond vp<[[EC]]> +; CHECK-NEXT: No successors +; CHECK-NEXT: } +; CHECK-NEXT: Successor(s): middle.split +; CHECK-EMPTY: +; CHECK-NEXT: middle.split: +; CHECK-NEXT: EMIT branch-on-cond vp<[[EA_TAKEN]]> +; CHECK-NEXT: Successor(s): ir-bb, middle.block +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %p1 = phi i64 [ 0, %loop.header ] (extra operand: ir<0>) +; CHECK-NEXT: No successors +; CHECK-EMPTY: +; CHECK-NEXT: middle.block: +; CHECK-NEXT: EMIT vp<[[MIDDLE_CMP:%.+]]> = icmp eq ir<128>, vp<[[VTC]]> +; CHECK-NEXT: EMIT branch-on-cond vp<[[MIDDLE_CMP]]> +; CHECK-NEXT: Successor(s): ir-bb, scalar.ph +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %p2 = phi i64 [ 1, %loop.latch ] (extra operand: ir<1>) +; CHECK-NEXT: No successors +; CHECK-EMPTY: +; CHECK-NEXT: scalar.ph: +; CHECK-NEXT: ir-bb +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %iv = phi i64 [ %inc, %loop.latch ], [ 0, %entry ] +; CHECK-NEXT: IR %c.1 = icmp uge i64 %iv, %N +; CHECK-NEXT: No successors +; CHECK-NEXT: } +entry: + %src = alloca [128 x i64] + call void @init(ptr %src) + br label %loop.header + +loop.header: + %iv = phi i64 [ %inc, %loop.latch ], [ 0, %entry ] + %gep.src = getelementptr inbounds i64, ptr %src, i64 %iv + %l = load i64, ptr %gep.src + %c.1 = icmp eq i64 %l, 10 + br i1 %c.1, label %e1, label %loop.latch + +loop.latch: + %inc = add nuw i64 %iv, 1 + %c.2 = icmp eq i64 %inc, 128 + br i1 %c.2, label %e2, label %loop.header + +e1: + %p1 = phi i64 [ %l, %loop.header ] + ret i64 %p1 + +e2: + %p2 = phi i64 [ 1, %loop.latch ] + ret i64 %p2 +} + +define i64 @multi_exiting_to_same_exit_load_exit_value() { +; CHECK-LABEL: VPlan 'Final VPlan for VF={4},UF={1}' { +; CHECK-NEXT: Live-in vp<[[VF:%.+]]> = VF +; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF +; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count +; CHECK-NEXT: Live-in ir<128> = original trip-count +; CHECK-EMPTY: +; CHECK-NEXT: vector.ph: +; CHECK-NEXT: Successor(s): vector loop +; CHECK-EMPTY: +; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vector.body: +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next> +; CHECK-NEXT: WIDEN-INDUCTION %iv = phi %inc, 0, ir<1>, vp<[[VF]]> +; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> +; CHECK-NEXT: WIDEN ir<%c.1> = icmp uge ir<%iv>, ir<%N> +; CHECK-NEXT: EMIT vp<[[NOT1:%.+]]> = not ir<%c.1> +; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr ir<%p>, vp<[[STEPS]]> +; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%arrayidx> +; CHECK-NEXT: WIDEN store vp<[[VEC_PTR]]>, ir<0>, vp<[[NOT1]]> +; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> +; CHECK-NEXT: EMIT vp<[[NOT2:%.+]]> = not vp<[[NOT1]]> +; CHECK-NEXT: EMIT vp<[[EA_TAKEN:%.+]]> = any-of vp<[[NOT2]]> +; CHECK-NEXT: EMIT vp<[[LATCH_CMP:%.+]]> = icmp eq vp<%index.next>, vp<[[VTC]]> +; CHECK-NEXT: EMIT vp<[[EC:%.+]]> = or vp<[[EA_TAKEN]]>, vp<[[LATCH_CMP]]> +; CHECK-NEXT: EMIT branch-on-cond vp<[[EC]]> +; CHECK-NEXT: No successors +; CHECK-NEXT: } +; CHECK-NEXT: Successor(s): middle.split +; CHECK-EMPTY: +; CHECK-NEXT: middle.split: +; CHECK-NEXT: EMIT branch-on-cond vp<[[EA_TAKEN]]> +; CHECK-NEXT: Successor(s): ir-bb, middle.block +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %p1 = phi i64 [ 0, %loop.header ] (extra operand: ir<0>) +; CHECK-NEXT: No successors +; CHECK-EMPTY: +; CHECK-NEXT: middle.block: +; CHECK-NEXT: EMIT vp<[[MIDDLE_CMP:%.+]]> = icmp eq ir<128>, vp<[[VTC]]> +; CHECK-NEXT: EMIT branch-on-cond vp<[[MIDDLE_CMP]]> +; CHECK-NEXT: Successor(s): ir-bb, scalar.ph +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %p2 = phi i64 [ 1, %loop.latch ] (extra operand: ir<1>) +; CHECK-NEXT: No successors +; CHECK-EMPTY: +; CHECK-NEXT: scalar.ph: +; CHECK-NEXT: ir-bb +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %iv = phi i64 [ %inc, %loop.latch ], [ 0, %entry ] +; CHECK-NEXT: IR %c.1 = icmp uge i64 %iv, %N +; CHECK-NEXT: No successors +; CHECK-NEXT: } +entry: + %src = alloca [128 x i64] + call void @init(ptr %src) + br label %loop.header + +loop.header: + %iv = phi i64 [ %inc, %loop.latch ], [ 0, %entry ] + %gep.src = getelementptr inbounds i64, ptr %src, i64 %iv + %l = load i64, ptr %gep.src + %l.2 = load i64, ptr %gep.src + %c.1 = icmp eq i64 %l, 10 + br i1 %c.1, label %e1, label %loop.latch + +loop.latch: + %inc = add nuw i64 %iv, 1 + %c.2 = icmp eq i64 %inc, 128 + br i1 %c.2, label %e1, label %loop.header + +e1: + %p1 = phi i64 [ %l, %loop.header ], [ %l.2, %loop.latch ] + ret i64 %p1 +} From e849195a4994293aab10fb9786dc006064374ac0 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Tue, 5 Nov 2024 20:48:45 +0000 Subject: [PATCH 08/19] !fixup address more comments. --- .../Transforms/Vectorize/LoopVectorize.cpp | 27 +- llvm/lib/Transforms/Vectorize/VPlan.h | 4 + llvm/lib/Transforms/Vectorize/VPlanCFG.h | 9 + .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 1 - .../Transforms/Vectorize/VPlanTransforms.cpp | 13 +- .../Transforms/Vectorize/VPlanTransforms.h | 13 +- .../LoopVectorize/X86/multi-exit-codegen.ll | 240 ----------------- .../X86/uncountable-early-exit-vplan.ll | 244 ------------------ .../uncountable-early-exit-vplan.ll | 171 ++++++++++++ 9 files changed, 218 insertions(+), 504 deletions(-) delete mode 100644 llvm/test/Transforms/LoopVectorize/X86/multi-exit-codegen.ll delete mode 100644 llvm/test/Transforms/LoopVectorize/X86/uncountable-early-exit-vplan.ll create mode 100644 llvm/test/Transforms/LoopVectorize/uncountable-early-exit-vplan.ll diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 5188aeed2f32b5..1c02db88c3f3c8 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8839,11 +8839,7 @@ static SetVector collectUsersInExitBlocks( Loop *OrigLoop, VPRecipeBuilder &Builder, VPlan &Plan, const MapVector &Inductions) { SetVector ExitUsersToFix; - for (VPBlockBase *VPB : vp_depth_first_shallow( - Plan.getVectorLoopRegion()->getSingleSuccessor())) { - if (VPB->getNumSuccessors() != 0 || VPB == Plan.getScalarHeader()) - continue; - auto *ExitVPBB = cast(VPB); + for (VPIRBasicBlock *ExitVPBB : Plan.getExitBlocks()) { BasicBlock *ExitBB = ExitVPBB->getIRBasicBlock(); for (VPRecipeBase &R : *ExitVPBB) { auto *ExitIRI = dyn_cast(&R); @@ -9178,14 +9174,31 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) { RecipeBuilder.fixHeaderPhis(); if (Legal->hasUncountableEarlyExit()) { - VPlanTransforms::handleUncountableEarlyExit(*Plan, *PSE.getSE(), OrigLoop, - RecipeBuilder); + VPlanTransforms::handleUncountableEarlyExit( + *Plan, *PSE.getSE(), OrigLoop, Legal->getUncountableExitingBlocks(), + RecipeBuilder); } addScalarResumePhis(RecipeBuilder, *Plan); SetVector ExitUsersToFix = collectUsersInExitBlocks( OrigLoop, RecipeBuilder, *Plan, Legal->getInductionVars()); addExitUsersForFirstOrderRecurrences(*Plan, ExitUsersToFix); addUsersInExitBlocks(*Plan, ExitUsersToFix); + + // Currently only live-ins can be used by exit values. We also bail out if any + // exit value isn't handled in VPlan yet, i.e. a VPIRInstruction in the exit + // without any operands. + if (Legal->hasUncountableEarlyExit()) { + if (any_of(Plan->getExitBlocks(), [](VPIRBasicBlock *ExitBB) { + return any_of(*ExitBB, [](VPRecipeBase &R) { + auto VPIRI = cast(&R); + return VPIRI->getNumOperands() == 0 || + any_of(VPIRI->operands(), + [](VPValue *Op) { return !Op->isLiveIn(); }); + }); + })) + return nullptr; + } + // --------------------------------------------------------------------------- // Transform initial VPlan: Apply previously taken decisions, in order, to // bring the VPlan to its final state. diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 44fd49fed6ad90..34a3a180bfa52a 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -3832,6 +3832,10 @@ class VPlan { return cast(getScalarPreheader()->getSinglePredecessor()); } + /// Return the exit blocks of the VPlan, that is leaf nodes except the scalar + /// header. + auto getExitBlocks(); + /// The trip count of the original loop. VPValue *getTripCount() const { assert(TripCount && "trip count needs to be set before accessing it"); diff --git a/llvm/lib/Transforms/Vectorize/VPlanCFG.h b/llvm/lib/Transforms/Vectorize/VPlanCFG.h index 89e2e7514dac2b..6ca388a953a6ff 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanCFG.h +++ b/llvm/lib/Transforms/Vectorize/VPlanCFG.h @@ -306,6 +306,15 @@ template <> struct GraphTraits { } }; +inline auto VPlan::getExitBlocks() { + VPBlockBase *ScalarHeader = getScalarHeader(); + return make_filter_range( + VPBlockUtils::blocksOnly( + vp_depth_first_shallow(getVectorLoopRegion()->getSingleSuccessor())), + [ScalarHeader](VPIRBasicBlock *VPIRBB) { + return VPIRBB != ScalarHeader && VPIRBB->getNumSuccessors() == 0; + }); +} } // namespace llvm #endif // LLVM_TRANSFORMS_VECTORIZE_VPLANCFG_H diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 7f582ce8c99433..a1a0c2ffcf0597 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -872,7 +872,6 @@ void VPIRInstruction::print(raw_ostream &O, const Twine &Indent, O << Indent << "IR " << I; if (getNumOperands() != 0) { - // assert(getNumOperands() == 1 && "can have at most 1 operand"); O << " (extra operand: "; printOperands(O, SlotTracker); O << ")"; diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 661b100d7881c2..ed37e492b34fb1 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1793,31 +1793,26 @@ void VPlanTransforms::createInterleaveGroups( void VPlanTransforms::handleUncountableEarlyExit( VPlan &Plan, ScalarEvolution &SE, Loop *OrigLoop, + ArrayRef UncountableExitingBlocks, VPRecipeBuilder &RecipeBuilder) { auto *LatchVPBB = cast(Plan.getVectorLoopRegion()->getExiting()); VPBuilder Builder(LatchVPBB->getTerminator()); auto *MiddleVPBB = Plan.getMiddleBlock(); VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion(); - VPValue *EarlyExitTaken = nullptr; - SmallVector ExitingBBs; - OrigLoop->getExitingBlocks(ExitingBBs); // Process all uncountable exiting blocks. For each exiting block, update the // EarlyExitTaken, which tracks if any uncountable early exit has been taken. // Also split the middle block and branch to the exit block for the early exit // if it has been taken. - for (BasicBlock *Exiting : ExitingBBs) { - if (Exiting == OrigLoop->getLoopLatch()) - continue; - + for (BasicBlock *Exiting : UncountableExitingBlocks) { auto *ExitingTerm = cast(Exiting->getTerminator()); BasicBlock *TrueSucc = ExitingTerm->getSuccessor(0); BasicBlock *FalseSucc = ExitingTerm->getSuccessor(1); - VPBasicBlock *VPExitBlock; + VPIRBasicBlock *VPExitBlock; if (OrigLoop->getUniqueExitBlock()) { - VPExitBlock = cast(MiddleVPBB->getSuccessors()[0]); + VPExitBlock = cast(MiddleVPBB->getSuccessors()[0]); } else { VPExitBlock = VPIRBasicBlock::fromBasicBlock( !OrigLoop->contains(TrueSucc) ? TrueSucc : FalseSucc); diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h index f7bbae25279fce..dc5dec2f1b84a6 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h @@ -124,9 +124,16 @@ struct VPlanTransforms { /// Remove dead recipes from \p Plan. static void removeDeadRecipes(VPlan &Plan); - static void handleUncountableEarlyExit(VPlan &Plan, ScalarEvolution &SE, - Loop *OrigLoop, - VPRecipeBuilder &RecipeBuilder); + /// Update \p Plan to account for uncountable exit blocks in \p + /// UncountableExitingBlocks by + /// * updating the condition to exit the vector loop to include the early + /// exit conditions + /// * splitting the original middle block to branch to the early exit blocks + /// if taken. Returns false if the transformation wasn't successful. + static void + handleUncountableEarlyExit(VPlan &Plan, ScalarEvolution &SE, Loop *OrigLoop, + ArrayRef UncountableExitingBlocks, + VPRecipeBuilder &RecipeBuilder); }; } // namespace llvm diff --git a/llvm/test/Transforms/LoopVectorize/X86/multi-exit-codegen.ll b/llvm/test/Transforms/LoopVectorize/X86/multi-exit-codegen.ll deleted file mode 100644 index 0c33715c6bd271..00000000000000 --- a/llvm/test/Transforms/LoopVectorize/X86/multi-exit-codegen.ll +++ /dev/null @@ -1,240 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt -p loop-vectorize -mcpu=skylake-avx512 -mtriple=x86_64-apple-macosx -force-vector-interleave=1 -S -enable-early-exit-vectorization %s | FileCheck --check-prefix=MULTI %s -; RUN: opt -p loop-vectorize -mcpu=skylake-avx512 -mtriple=x86_64-apple-macosx -force-vector-interleave=1 -S -enable-early-exit-vectorization=false %s | FileCheck --check-prefix=DEFAULT %s - -define i64 @multi_exit_with_store(ptr %p, i64 %N) { -; MULTI-LABEL: define i64 @multi_exit_with_store( -; MULTI-SAME: ptr [[P:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { -; MULTI-NEXT: [[ENTRY:.*]]: -; MULTI-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] -; MULTI: [[VECTOR_PH]]: -; MULTI-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[N]], i64 0 -; MULTI-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer -; MULTI-NEXT: br label %[[VECTOR_BODY:.*]] -; MULTI: [[VECTOR_BODY]]: -; MULTI-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; MULTI-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; MULTI-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; MULTI-NEXT: [[TMP1:%.*]] = icmp uge <4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] -; MULTI-NEXT: [[TMP2:%.*]] = xor <4 x i1> [[TMP1]], -; MULTI-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[P]], i64 [[TMP0]] -; MULTI-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[TMP3]], i32 0 -; MULTI-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> zeroinitializer, ptr [[TMP4]], i32 4, <4 x i1> [[TMP2]]) -; MULTI-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; MULTI-NEXT: [[TMP5:%.*]] = xor <4 x i1> [[TMP2]], -; MULTI-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]]) -; MULTI-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128 -; MULTI-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], -; MULTI-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]] -; MULTI-NEXT: br i1 [[TMP8]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] -; MULTI: [[MIDDLE_SPLIT]]: -; MULTI-NEXT: br i1 [[TMP6]], label %[[E1:.*]], label %[[MIDDLE_BLOCK:.*]] -; MULTI: [[MIDDLE_BLOCK]]: -; MULTI-NEXT: br i1 true, label %[[E2:.*]], label %[[SCALAR_PH]] -; MULTI: [[SCALAR_PH]]: -; MULTI-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 128, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; MULTI-NEXT: br label %[[LOOP_HEADER:.*]] -; MULTI: [[LOOP_HEADER]]: -; MULTI-NEXT: [[I_07:%.*]] = phi i64 [ [[INC:%.*]], %[[LOOP_LATCH:.*]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] -; MULTI-NEXT: [[CMP1:%.*]] = icmp uge i64 [[I_07]], [[N]] -; MULTI-NEXT: br i1 [[CMP1]], label %[[E1]], label %[[LOOP_LATCH]] -; MULTI: [[LOOP_LATCH]]: -; MULTI-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[P]], i64 [[I_07]] -; MULTI-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 -; MULTI-NEXT: [[INC]] = add nuw i64 [[I_07]], 1 -; MULTI-NEXT: [[CMP_NOT:%.*]] = icmp eq i64 [[INC]], 128 -; MULTI-NEXT: br i1 [[CMP_NOT]], label %[[E2]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP3:![0-9]+]] -; MULTI: [[E1]]: -; MULTI-NEXT: ret i64 0 -; MULTI: [[E2]]: -; MULTI-NEXT: ret i64 1 -; -; DEFAULT-LABEL: define i64 @multi_exit_with_store( -; DEFAULT-SAME: ptr [[P:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { -; DEFAULT-NEXT: [[ENTRY:.*]]: -; DEFAULT-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[N]], i64 127) -; DEFAULT-NEXT: [[TMP4:%.*]] = add nuw nsw i64 [[UMIN]], 1 -; DEFAULT-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 [[TMP4]], 4 -; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] -; DEFAULT: [[VECTOR_PH]]: -; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP4]], 4 -; DEFAULT-NEXT: [[TMP5:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 -; DEFAULT-NEXT: [[TMP2:%.*]] = select i1 [[TMP5]], i64 4, i64 [[N_MOD_VF]] -; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP4]], [[TMP2]] -; DEFAULT-NEXT: br label %[[VECTOR_BODY:.*]] -; DEFAULT: [[VECTOR_BODY]]: -; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; DEFAULT-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; DEFAULT-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 [[TMP0]] -; DEFAULT-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 -; DEFAULT-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP3]], align 4 -; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; DEFAULT-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; DEFAULT-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] -; DEFAULT: [[MIDDLE_BLOCK]]: -; DEFAULT-NEXT: br label %[[SCALAR_PH]] -; DEFAULT: [[SCALAR_PH]]: -; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; DEFAULT-NEXT: br label %[[LOOP_HEADER:.*]] -; DEFAULT: [[LOOP_HEADER]]: -; DEFAULT-NEXT: [[I_07:%.*]] = phi i64 [ [[INC:%.*]], %[[LOOP_LATCH:.*]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] -; DEFAULT-NEXT: [[CMP1:%.*]] = icmp uge i64 [[I_07]], [[N]] -; DEFAULT-NEXT: br i1 [[CMP1]], label %[[E1:.*]], label %[[LOOP_LATCH]] -; DEFAULT: [[LOOP_LATCH]]: -; DEFAULT-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[P]], i64 [[I_07]] -; DEFAULT-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 -; DEFAULT-NEXT: [[INC]] = add nuw i64 [[I_07]], 1 -; DEFAULT-NEXT: [[CMP_NOT:%.*]] = icmp eq i64 [[INC]], 128 -; DEFAULT-NEXT: br i1 [[CMP_NOT]], label %[[E2:.*]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP3:![0-9]+]] -; DEFAULT: [[E1]]: -; DEFAULT-NEXT: ret i64 0 -; DEFAULT: [[E2]]: -; DEFAULT-NEXT: ret i64 1 -; -entry: - br label %loop.header - -loop.header: - %iv = phi i64 [ %inc, %loop.latch ], [ 0, %entry ] - %c.1 = icmp uge i64 %iv, %N - br i1 %c.1, label %e1, label %loop.latch - -loop.latch: - %arrayidx = getelementptr inbounds nuw i32, ptr %p, i64 %iv - store i32 0, ptr %arrayidx - %inc = add nuw i64 %iv, 1 - %c.2 = icmp eq i64 %inc, 128 - br i1 %c.2, label %e2, label %loop.header, !llvm.loop !1 - -e1: - ret i64 0 - -e2: - ret i64 1 -} - -define i64 @multi_exiting_to_same_exit_with_store(ptr %p, i64 %N) { -; MULTI-LABEL: define i64 @multi_exiting_to_same_exit_with_store( -; MULTI-SAME: ptr [[P:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { -; MULTI-NEXT: [[ENTRY:.*]]: -; MULTI-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] -; MULTI: [[VECTOR_PH]]: -; MULTI-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[N]], i64 0 -; MULTI-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer -; MULTI-NEXT: br label %[[VECTOR_BODY:.*]] -; MULTI: [[VECTOR_BODY]]: -; MULTI-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; MULTI-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; MULTI-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; MULTI-NEXT: [[TMP1:%.*]] = icmp uge <4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] -; MULTI-NEXT: [[TMP2:%.*]] = xor <4 x i1> [[TMP1]], -; MULTI-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[P]], i64 [[TMP0]] -; MULTI-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[TMP3]], i32 0 -; MULTI-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> zeroinitializer, ptr [[TMP4]], i32 4, <4 x i1> [[TMP2]]) -; MULTI-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; MULTI-NEXT: [[TMP5:%.*]] = xor <4 x i1> [[TMP2]], -; MULTI-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]]) -; MULTI-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128 -; MULTI-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], -; MULTI-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]] -; MULTI-NEXT: br i1 [[TMP8]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] -; MULTI: [[MIDDLE_SPLIT]]: -; MULTI-NEXT: br i1 [[TMP6]], label %[[E:.*]], label %[[MIDDLE_BLOCK:.*]] -; MULTI: [[MIDDLE_BLOCK]]: -; MULTI-NEXT: br i1 true, label %[[E]], label %[[SCALAR_PH]] -; MULTI: [[SCALAR_PH]]: -; MULTI-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 128, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; MULTI-NEXT: br label %[[LOOP_HEADER:.*]] -; MULTI: [[LOOP_HEADER]]: -; MULTI-NEXT: [[IV:%.*]] = phi i64 [ [[INC:%.*]], %[[LOOP_LATCH:.*]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] -; MULTI-NEXT: [[C_1:%.*]] = icmp uge i64 [[IV]], [[N]] -; MULTI-NEXT: br i1 [[C_1]], label %[[E]], label %[[LOOP_LATCH]] -; MULTI: [[LOOP_LATCH]]: -; MULTI-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[P]], i64 [[IV]] -; MULTI-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 -; MULTI-NEXT: [[INC]] = add nuw i64 [[IV]], 1 -; MULTI-NEXT: [[C_2:%.*]] = icmp eq i64 [[INC]], 128 -; MULTI-NEXT: br i1 [[C_2]], label %[[E]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP5:![0-9]+]] -; MULTI: [[E]]: -; MULTI-NEXT: [[P1:%.*]] = phi i64 [ 0, %[[LOOP_HEADER]] ], [ 1, %[[LOOP_LATCH]] ], [ 0, %[[MIDDLE_BLOCK]] ], [ 1, %[[MIDDLE_SPLIT]] ] -; MULTI-NEXT: ret i64 [[P1]] -; -; DEFAULT-LABEL: define i64 @multi_exiting_to_same_exit_with_store( -; DEFAULT-SAME: ptr [[P:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { -; DEFAULT-NEXT: [[ENTRY:.*]]: -; DEFAULT-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[N]], i64 127) -; DEFAULT-NEXT: [[TMP0:%.*]] = add nuw nsw i64 [[UMIN]], 1 -; DEFAULT-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 [[TMP0]], 4 -; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] -; DEFAULT: [[VECTOR_PH]]: -; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 4 -; DEFAULT-NEXT: [[TMP1:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 -; DEFAULT-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i64 4, i64 [[N_MOD_VF]] -; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[TMP2]] -; DEFAULT-NEXT: br label %[[VECTOR_BODY:.*]] -; DEFAULT: [[VECTOR_BODY]]: -; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; DEFAULT-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 -; DEFAULT-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 [[TMP3]] -; DEFAULT-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 0 -; DEFAULT-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP5]], align 4 -; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; DEFAULT-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; DEFAULT-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] -; DEFAULT: [[MIDDLE_BLOCK]]: -; DEFAULT-NEXT: br label %[[SCALAR_PH]] -; DEFAULT: [[SCALAR_PH]]: -; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; DEFAULT-NEXT: br label %[[LOOP_HEADER:.*]] -; DEFAULT: [[LOOP_HEADER]]: -; DEFAULT-NEXT: [[IV:%.*]] = phi i64 [ [[INC:%.*]], %[[LOOP_LATCH:.*]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] -; DEFAULT-NEXT: [[C_1:%.*]] = icmp uge i64 [[IV]], [[N]] -; DEFAULT-NEXT: br i1 [[C_1]], label %[[E:.*]], label %[[LOOP_LATCH]] -; DEFAULT: [[LOOP_LATCH]]: -; DEFAULT-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[P]], i64 [[IV]] -; DEFAULT-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 -; DEFAULT-NEXT: [[INC]] = add nuw i64 [[IV]], 1 -; DEFAULT-NEXT: [[C_2:%.*]] = icmp eq i64 [[INC]], 128 -; DEFAULT-NEXT: br i1 [[C_2]], label %[[E]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP5:![0-9]+]] -; DEFAULT: [[E]]: -; DEFAULT-NEXT: [[P1:%.*]] = phi i64 [ 0, %[[LOOP_HEADER]] ], [ 1, %[[LOOP_LATCH]] ] -; DEFAULT-NEXT: ret i64 [[P1]] -; -entry: - br label %loop.header - -loop.header: - %iv = phi i64 [ %inc, %loop.latch ], [ 0, %entry ] - %c.1 = icmp uge i64 %iv, %N - br i1 %c.1, label %e, label %loop.latch - -loop.latch: - %arrayidx = getelementptr inbounds nuw i32, ptr %p, i64 %iv - store i32 0, ptr %arrayidx - %inc = add nuw i64 %iv, 1 - %c.2 = icmp eq i64 %inc, 128 - br i1 %c.2, label %e, label %loop.header, !llvm.loop !1 - -e: - %p1 = phi i64 [ 0, %loop.header ], [ 1, %loop.latch ] - ret i64 %p1 -} - -!1 = distinct !{!1, !2, !3} -!2 = !{!"llvm.loop.vectorize.width", i32 4} -!3 = !{!"llvm.loop.vectorize.enable", i1 true} -;. -; MULTI: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} -; MULTI: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} -; MULTI: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} -; MULTI: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} -; MULTI: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} -; MULTI: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]} -;. -; DEFAULT: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} -; DEFAULT: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} -; DEFAULT: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} -; DEFAULT: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} -; DEFAULT: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} -; DEFAULT: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]} -;. diff --git a/llvm/test/Transforms/LoopVectorize/X86/uncountable-early-exit-vplan.ll b/llvm/test/Transforms/LoopVectorize/X86/uncountable-early-exit-vplan.ll deleted file mode 100644 index 13f5671f893651..00000000000000 --- a/llvm/test/Transforms/LoopVectorize/X86/uncountable-early-exit-vplan.ll +++ /dev/null @@ -1,244 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt -p loop-vectorize -mcpu=skylake-avx512 -mtriple=x86_64-apple-macosx -force-vector-interleave=1 -S -enable-early-exit-vectorization -debug %s 2>&1 | FileCheck %s - -declare void @init(ptr) - -define i64 @multi_exiting_to_different_exits_live_in_exit_values() { -; CHECK-LABEL: VPlan 'Final VPlan for VF={4},UF={1}' { -; CHECK-NEXT: Live-in vp<[[VF:%.+]]> = VF -; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF -; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count -; CHECK-NEXT: Live-in ir<128> = original trip-count -; CHECK-EMPTY: -; CHECK-NEXT: vector.ph: -; CHECK-NEXT: Successor(s): vector loop -; CHECK-EMPTY: -; CHECK-NEXT: vector loop: { -; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next> -; CHECK-NEXT: WIDEN-INDUCTION %iv = phi %inc, 0, ir<1>, vp<[[VF]]> -; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> -; CHECK-NEXT: WIDEN ir<%c.1> = icmp uge ir<%iv>, ir<%N> -; CHECK-NEXT: EMIT vp<[[NOT1:%.+]]> = not ir<%c.1> -; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr ir<%p>, vp<[[STEPS]]> -; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%arrayidx> -; CHECK-NEXT: WIDEN store vp<[[VEC_PTR]]>, ir<0>, vp<[[NOT1]]> -; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> -; CHECK-NEXT: EMIT vp<[[NOT2:%.+]]> = not vp<[[NOT1]]> -; CHECK-NEXT: EMIT vp<[[EA_TAKEN:%.+]]> = any-of vp<[[NOT2]]> -; CHECK-NEXT: EMIT vp<[[LATCH_CMP:%.+]]> = icmp eq vp<%index.next>, vp<[[VTC]]> -; CHECK-NEXT: EMIT vp<[[EC:%.+]]> = or vp<[[EA_TAKEN]]>, vp<[[LATCH_CMP]]> -; CHECK-NEXT: EMIT branch-on-cond vp<[[EC]]> -; CHECK-NEXT: No successors -; CHECK-NEXT: } -; CHECK-NEXT: Successor(s): middle.split -; CHECK-EMPTY: -; CHECK-NEXT: middle.split: -; CHECK-NEXT: EMIT branch-on-cond vp<[[EA_TAKEN]]> -; CHECK-NEXT: Successor(s): ir-bb, middle.block -; CHECK-EMPTY: -; CHECK-NEXT: ir-bb: -; CHECK-NEXT: IR %p1 = phi i64 [ 0, %loop.header ] (extra operand: ir<0>) -; CHECK-NEXT: No successors -; CHECK-EMPTY: -; CHECK-NEXT: middle.block: -; CHECK-NEXT: EMIT vp<[[MIDDLE_CMP:%.+]]> = icmp eq ir<128>, vp<[[VTC]]> -; CHECK-NEXT: EMIT branch-on-cond vp<[[MIDDLE_CMP]]> -; CHECK-NEXT: Successor(s): ir-bb, scalar.ph -; CHECK-EMPTY: -; CHECK-NEXT: ir-bb: -; CHECK-NEXT: IR %p2 = phi i64 [ 1, %loop.latch ] (extra operand: ir<1>) -; CHECK-NEXT: No successors -; CHECK-EMPTY: -; CHECK-NEXT: scalar.ph: -; CHECK-NEXT: ir-bb -; CHECK-EMPTY: -; CHECK-NEXT: ir-bb: -; CHECK-NEXT: IR %iv = phi i64 [ %inc, %loop.latch ], [ 0, %entry ] -; CHECK-NEXT: IR %c.1 = icmp uge i64 %iv, %N -; CHECK-NEXT: No successors -; CHECK-NEXT: } -entry: - %src = alloca [128 x i32] - call void @init(ptr %src) - br label %loop.header - -loop.header: - %iv = phi i64 [ %inc, %loop.latch ], [ 0, %entry ] - %gep.src = getelementptr inbounds i32, ptr %src, i64 %iv - %l = load i32, ptr %gep.src - %c.1 = icmp eq i32 %l, 10 - br i1 %c.1, label %e1, label %loop.latch - -loop.latch: - %inc = add nuw i64 %iv, 1 - %c.2 = icmp eq i64 %inc, 128 - br i1 %c.2, label %e2, label %loop.header - -e1: - %p1 = phi i64 [ 0, %loop.header ] - ret i64 %p1 - -e2: - %p2 = phi i64 [ 1, %loop.latch ] - ret i64 %p2 -} - -define i64 @multi_exiting_to_different_exits_load_exit_value() { -; CHECK-LABEL: VPlan 'Final VPlan for VF={4},UF={1}' { -; CHECK-NEXT: Live-in vp<[[VF:%.+]]> = VF -; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF -; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count -; CHECK-NEXT: Live-in ir<128> = original trip-count -; CHECK-EMPTY: -; CHECK-NEXT: vector.ph: -; CHECK-NEXT: Successor(s): vector loop -; CHECK-EMPTY: -; CHECK-NEXT: vector loop: { -; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next> -; CHECK-NEXT: WIDEN-INDUCTION %iv = phi %inc, 0, ir<1>, vp<[[VF]]> -; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> -; CHECK-NEXT: WIDEN ir<%c.1> = icmp uge ir<%iv>, ir<%N> -; CHECK-NEXT: EMIT vp<[[NOT1:%.+]]> = not ir<%c.1> -; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr ir<%p>, vp<[[STEPS]]> -; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%arrayidx> -; CHECK-NEXT: WIDEN store vp<[[VEC_PTR]]>, ir<0>, vp<[[NOT1]]> -; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> -; CHECK-NEXT: EMIT vp<[[NOT2:%.+]]> = not vp<[[NOT1]]> -; CHECK-NEXT: EMIT vp<[[EA_TAKEN:%.+]]> = any-of vp<[[NOT2]]> -; CHECK-NEXT: EMIT vp<[[LATCH_CMP:%.+]]> = icmp eq vp<%index.next>, vp<[[VTC]]> -; CHECK-NEXT: EMIT vp<[[EC:%.+]]> = or vp<[[EA_TAKEN]]>, vp<[[LATCH_CMP]]> -; CHECK-NEXT: EMIT branch-on-cond vp<[[EC]]> -; CHECK-NEXT: No successors -; CHECK-NEXT: } -; CHECK-NEXT: Successor(s): middle.split -; CHECK-EMPTY: -; CHECK-NEXT: middle.split: -; CHECK-NEXT: EMIT branch-on-cond vp<[[EA_TAKEN]]> -; CHECK-NEXT: Successor(s): ir-bb, middle.block -; CHECK-EMPTY: -; CHECK-NEXT: ir-bb: -; CHECK-NEXT: IR %p1 = phi i64 [ 0, %loop.header ] (extra operand: ir<0>) -; CHECK-NEXT: No successors -; CHECK-EMPTY: -; CHECK-NEXT: middle.block: -; CHECK-NEXT: EMIT vp<[[MIDDLE_CMP:%.+]]> = icmp eq ir<128>, vp<[[VTC]]> -; CHECK-NEXT: EMIT branch-on-cond vp<[[MIDDLE_CMP]]> -; CHECK-NEXT: Successor(s): ir-bb, scalar.ph -; CHECK-EMPTY: -; CHECK-NEXT: ir-bb: -; CHECK-NEXT: IR %p2 = phi i64 [ 1, %loop.latch ] (extra operand: ir<1>) -; CHECK-NEXT: No successors -; CHECK-EMPTY: -; CHECK-NEXT: scalar.ph: -; CHECK-NEXT: ir-bb -; CHECK-EMPTY: -; CHECK-NEXT: ir-bb: -; CHECK-NEXT: IR %iv = phi i64 [ %inc, %loop.latch ], [ 0, %entry ] -; CHECK-NEXT: IR %c.1 = icmp uge i64 %iv, %N -; CHECK-NEXT: No successors -; CHECK-NEXT: } -entry: - %src = alloca [128 x i64] - call void @init(ptr %src) - br label %loop.header - -loop.header: - %iv = phi i64 [ %inc, %loop.latch ], [ 0, %entry ] - %gep.src = getelementptr inbounds i64, ptr %src, i64 %iv - %l = load i64, ptr %gep.src - %c.1 = icmp eq i64 %l, 10 - br i1 %c.1, label %e1, label %loop.latch - -loop.latch: - %inc = add nuw i64 %iv, 1 - %c.2 = icmp eq i64 %inc, 128 - br i1 %c.2, label %e2, label %loop.header - -e1: - %p1 = phi i64 [ %l, %loop.header ] - ret i64 %p1 - -e2: - %p2 = phi i64 [ 1, %loop.latch ] - ret i64 %p2 -} - -define i64 @multi_exiting_to_same_exit_load_exit_value() { -; CHECK-LABEL: VPlan 'Final VPlan for VF={4},UF={1}' { -; CHECK-NEXT: Live-in vp<[[VF:%.+]]> = VF -; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF -; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count -; CHECK-NEXT: Live-in ir<128> = original trip-count -; CHECK-EMPTY: -; CHECK-NEXT: vector.ph: -; CHECK-NEXT: Successor(s): vector loop -; CHECK-EMPTY: -; CHECK-NEXT: vector loop: { -; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next> -; CHECK-NEXT: WIDEN-INDUCTION %iv = phi %inc, 0, ir<1>, vp<[[VF]]> -; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> -; CHECK-NEXT: WIDEN ir<%c.1> = icmp uge ir<%iv>, ir<%N> -; CHECK-NEXT: EMIT vp<[[NOT1:%.+]]> = not ir<%c.1> -; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr ir<%p>, vp<[[STEPS]]> -; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%arrayidx> -; CHECK-NEXT: WIDEN store vp<[[VEC_PTR]]>, ir<0>, vp<[[NOT1]]> -; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> -; CHECK-NEXT: EMIT vp<[[NOT2:%.+]]> = not vp<[[NOT1]]> -; CHECK-NEXT: EMIT vp<[[EA_TAKEN:%.+]]> = any-of vp<[[NOT2]]> -; CHECK-NEXT: EMIT vp<[[LATCH_CMP:%.+]]> = icmp eq vp<%index.next>, vp<[[VTC]]> -; CHECK-NEXT: EMIT vp<[[EC:%.+]]> = or vp<[[EA_TAKEN]]>, vp<[[LATCH_CMP]]> -; CHECK-NEXT: EMIT branch-on-cond vp<[[EC]]> -; CHECK-NEXT: No successors -; CHECK-NEXT: } -; CHECK-NEXT: Successor(s): middle.split -; CHECK-EMPTY: -; CHECK-NEXT: middle.split: -; CHECK-NEXT: EMIT branch-on-cond vp<[[EA_TAKEN]]> -; CHECK-NEXT: Successor(s): ir-bb, middle.block -; CHECK-EMPTY: -; CHECK-NEXT: ir-bb: -; CHECK-NEXT: IR %p1 = phi i64 [ 0, %loop.header ] (extra operand: ir<0>) -; CHECK-NEXT: No successors -; CHECK-EMPTY: -; CHECK-NEXT: middle.block: -; CHECK-NEXT: EMIT vp<[[MIDDLE_CMP:%.+]]> = icmp eq ir<128>, vp<[[VTC]]> -; CHECK-NEXT: EMIT branch-on-cond vp<[[MIDDLE_CMP]]> -; CHECK-NEXT: Successor(s): ir-bb, scalar.ph -; CHECK-EMPTY: -; CHECK-NEXT: ir-bb: -; CHECK-NEXT: IR %p2 = phi i64 [ 1, %loop.latch ] (extra operand: ir<1>) -; CHECK-NEXT: No successors -; CHECK-EMPTY: -; CHECK-NEXT: scalar.ph: -; CHECK-NEXT: ir-bb -; CHECK-EMPTY: -; CHECK-NEXT: ir-bb: -; CHECK-NEXT: IR %iv = phi i64 [ %inc, %loop.latch ], [ 0, %entry ] -; CHECK-NEXT: IR %c.1 = icmp uge i64 %iv, %N -; CHECK-NEXT: No successors -; CHECK-NEXT: } -entry: - %src = alloca [128 x i64] - call void @init(ptr %src) - br label %loop.header - -loop.header: - %iv = phi i64 [ %inc, %loop.latch ], [ 0, %entry ] - %gep.src = getelementptr inbounds i64, ptr %src, i64 %iv - %l = load i64, ptr %gep.src - %l.2 = load i64, ptr %gep.src - %c.1 = icmp eq i64 %l, 10 - br i1 %c.1, label %e1, label %loop.latch - -loop.latch: - %inc = add nuw i64 %iv, 1 - %c.2 = icmp eq i64 %inc, 128 - br i1 %c.2, label %e1, label %loop.header - -e1: - %p1 = phi i64 [ %l, %loop.header ], [ %l.2, %loop.latch ] - ret i64 %p1 -} diff --git a/llvm/test/Transforms/LoopVectorize/uncountable-early-exit-vplan.ll b/llvm/test/Transforms/LoopVectorize/uncountable-early-exit-vplan.ll new file mode 100644 index 00000000000000..d840646a259529 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/uncountable-early-exit-vplan.ll @@ -0,0 +1,171 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -p loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -S -enable-early-exit-vectorization -debug %s 2>&1 | FileCheck %s + +declare void @init(ptr) + +define i64 @multi_exiting_to_different_exits_live_in_exit_values() { +; CHECK-LABEL: VPlan 'Final VPlan for VF={4},UF={1}' { +; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF +; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count +; CHECK-NEXT: Live-in ir<128> = original trip-count +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %src = alloca [128 x i32], align 4 +; CHECK-NEXT: IR call void @init(ptr %src) +; CHECK-NEXT: No successors +; CHECK-EMPTY: +; CHECK-NEXT: vector.ph: +; CHECK-NEXT: Successor(s): vector loop +; CHECK-EMPTY: +; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vector.body: +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next> +; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> +; CHECK-NEXT: CLONE ir<%gep.src> = getelementptr inbounds ir<%src>, vp<%3> +; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep.src> +; CHECK-NEXT: WIDEN ir<%l> = load vp<[[VEC_PTR]]> +; CHECK-NEXT: WIDEN ir<%c.1> = icmp eq ir<%l>, ir<10> +; CHECK-NEXT: EMIT vp<[[NOT1:%.+]]> = not ir<%c.1> +; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> +; CHECK-NEXT: EMIT vp<[[NOT2:%.+]]> = not vp<[[NOT1]]> +; CHECK-NEXT: EMIT vp<[[EA_TAKEN:%.+]]> = any-of vp<[[NOT2]]> +; CHECK-NEXT: EMIT vp<[[LATCH_CMP:%.+]]> = icmp eq vp<%index.next>, vp<[[VTC]]> +; CHECK-NEXT: EMIT vp<[[EC:%.+]]> = or vp<[[EA_TAKEN]]>, vp<[[LATCH_CMP]]> +; CHECK-NEXT: EMIT branch-on-cond vp<[[EC]]> +; CHECK-NEXT: No successors +; CHECK-NEXT: } +; CHECK-NEXT: Successor(s): middle.split +; CHECK-EMPTY: +; CHECK-NEXT: middle.split: +; CHECK-NEXT: EMIT branch-on-cond vp<[[EA_TAKEN]]> +; CHECK-NEXT: Successor(s): ir-bb, middle.block +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %p1 = phi i64 [ 0, %loop.header ] (extra operand: ir<0>) +; CHECK-NEXT: No successors +; CHECK-EMPTY: +; CHECK-NEXT: middle.block: +; CHECK-NEXT: EMIT vp<[[MIDDLE_CMP:%.+]]> = icmp eq ir<128>, vp<[[VTC]]> +; CHECK-NEXT: EMIT branch-on-cond vp<[[MIDDLE_CMP]]> +; CHECK-NEXT: Successor(s): ir-bb, scalar.ph +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %p2 = phi i64 [ 1, %loop.latch ] (extra operand: ir<1>) +; CHECK-NEXT: No successors +; CHECK-EMPTY: +; CHECK-NEXT: scalar.ph: +; CHECK-NEXT: ir-bb +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %iv = phi i64 [ %inc, %loop.latch ], [ 0, %entry ] +; CHECK: No successors +; CHECK-NEXT: } +entry: + %src = alloca [128 x i32] + call void @init(ptr %src) + br label %loop.header + +loop.header: + %iv = phi i64 [ %inc, %loop.latch ], [ 0, %entry ] + %gep.src = getelementptr inbounds i32, ptr %src, i64 %iv + %l = load i32, ptr %gep.src + %c.1 = icmp eq i32 %l, 10 + br i1 %c.1, label %e1, label %loop.latch + +loop.latch: + %inc = add nuw i64 %iv, 1 + %c.2 = icmp eq i64 %inc, 128 + br i1 %c.2, label %e2, label %loop.header + +e1: + %p1 = phi i64 [ 0, %loop.header ] + ret i64 %p1 + +e2: + %p2 = phi i64 [ 1, %loop.latch ] + ret i64 %p2 +} + +define i64 @multi_exiting_to_different_exits_load_exit_value() { +; CHECK-NOT: VPlan 'Final VPlan for VF={4},UF={1}' { +entry: + %src = alloca [128 x i64] + call void @init(ptr %src) + br label %loop.header + +loop.header: + %iv = phi i64 [ %inc, %loop.latch ], [ 0, %entry ] + %gep.src = getelementptr inbounds i64, ptr %src, i64 %iv + %l = load i64, ptr %gep.src + %c.1 = icmp eq i64 %l, 10 + br i1 %c.1, label %e1, label %loop.latch + +loop.latch: + %inc = add nuw i64 %iv, 1 + %c.2 = icmp eq i64 %inc, 128 + br i1 %c.2, label %e2, label %loop.header + +e1: + %p1 = phi i64 [ %l, %loop.header ] + ret i64 %p1 + +e2: + %p2 = phi i64 [ 1, %loop.latch ] + ret i64 %p2 +} + +define i64 @multi_exiting_to_same_exit_load_exit_value() { +; CHECK-NOT: VPlan 'Final VPlan for VF={4},UF={1}' { + +entry: + %src = alloca [128 x i64] + call void @init(ptr %src) + br label %loop.header + +loop.header: + %iv = phi i64 [ %inc, %loop.latch ], [ 0, %entry ] + %gep.src = getelementptr inbounds i64, ptr %src, i64 %iv + %l = load i64, ptr %gep.src + %l.2 = load i64, ptr %gep.src + %c.1 = icmp eq i64 %l, 10 + br i1 %c.1, label %e1, label %loop.latch + +loop.latch: + %inc = add nuw i64 %iv, 1 + %c.2 = icmp eq i64 %inc, 128 + br i1 %c.2, label %e1, label %loop.header + +e1: + %p1 = phi i64 [ %l, %loop.header ], [ %l.2, %loop.latch ] + ret i64 %p1 +} + +define i64 @multi_exiting_to_different_exits_induction_exit_value() { +; CHECK-NOT: VPlan 'Final VPlan for VF={4},UF={1}' { +entry: + %src = alloca [128 x i64] + call void @init(ptr %src) + br label %loop.header + +loop.header: + %iv = phi i64 [ %inc, %loop.latch ], [ 0, %entry ] + %gep.src = getelementptr inbounds i64, ptr %src, i64 %iv + %l = load i64, ptr %gep.src + %c.1 = icmp eq i64 %l, 10 + br i1 %c.1, label %e1, label %loop.latch + +loop.latch: + %inc = add nuw i64 %iv, 1 + %c.2 = icmp eq i64 %inc, 128 + br i1 %c.2, label %e2, label %loop.header + +e1: + %p1 = phi i64 [ %iv, %loop.header ] + ret i64 %p1 + +e2: + %p2 = phi i64 [ 1, %loop.latch ] + ret i64 %p2 +} + + From c53eca6a59f55edc3dcb2bb692956de2fd97bfce Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Wed, 6 Nov 2024 20:40:41 +0000 Subject: [PATCH 09/19] !fixup remove left over canVectorizeEarlyExit --- .../Vectorize/LoopVectorizationLegality.h | 3 --- .../Vectorize/LoopVectorizationLegality.cpp | 19 ------------------- 2 files changed, 22 deletions(-) diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h index af6fae44cf0f09..dc7e484a40a452 100644 --- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h +++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h @@ -287,9 +287,6 @@ class LoopVectorizationLegality { /// we can use in-order reductions. bool canVectorizeFPMath(bool EnableStrictReductions); - /// Returns true if the loop has an early exit that we can vectorize. - bool canVectorizeEarlyExit() const; - /// Return true if we can vectorize this loop while folding its tail by /// masking. bool canFoldTailByMasking() const; diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp index ed3808d2f30bf1..0267fb1adb16d6 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp @@ -1515,25 +1515,6 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() { return true; } -bool LoopVectorizationLegality::canVectorizeEarlyExit() const { - // Currently only allow vectorizing loops with early exits, if early-exit - // vectorization is explicitly enabled and the loop has metadata to force - // vectorization. - - SmallVector Exiting; - TheLoop->getExitingBlocks(Exiting); - if (Exiting.size() == 1) - return false; - - LoopVectorizeHints Hints(TheLoop, true, *ORE); - if (Hints.getForce() == LoopVectorizeHints::FK_Undefined) - return false; - - Function *Fn = TheLoop->getHeader()->getParent(); - return Hints.allowVectorization(Fn, TheLoop, - true /*VectorizeOnlyWhenForced*/); -} - // Helper function to canVectorizeLoopNestCFG. bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp, bool UseVPlanNativePath) { From e26af8e878c6484d1b7f95cb6f4c3c9966836fe2 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sat, 23 Nov 2024 12:56:49 +0000 Subject: [PATCH 10/19] !fixup address latest comments, thanks! --- .../Transforms/Vectorize/LoopVectorize.cpp | 94 +++++------ llvm/lib/Transforms/Vectorize/VPlan.cpp | 15 +- llvm/lib/Transforms/Vectorize/VPlan.h | 2 + .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 4 - .../Transforms/Vectorize/VPlanTransforms.cpp | 61 +++---- .../Transforms/Vectorize/VPlanTransforms.h | 12 +- .../LoopVectorize/early_exit_legality.ll | 2 +- .../LoopVectorize/single_early_exit.ll | 157 ++++++++++++++++-- .../uncountable-early-exit-vplan.ll | 156 +++++++++++++++++ .../LoopVectorize/unsupported_early_exit.ll | 2 +- 10 files changed, 384 insertions(+), 121 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index b59922fd3fd7c2..5134c4ec7d225c 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7783,7 +7783,6 @@ DenseMap LoopVectorizationPlanner::executePlan( LoopVectorizeHints Hints(L, true, *ORE); Hints.setAlreadyVectorized(); } - TargetTransformInfo::UnrollingPreferences UP; TTI.getUnrollingPreferences(L, *PSE.getSE(), UP, ORE); if (!UP.UnrollVectorizedLoop || CanonicalIVStartValue) @@ -8882,9 +8881,9 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan) { static SetVector collectUsersInExitBlocks( Loop *OrigLoop, VPRecipeBuilder &Builder, VPlan &Plan, const MapVector &Inductions) { + auto *MiddleVPBB = Plan.getMiddleBlock(); SetVector ExitUsersToFix; for (VPIRBasicBlock *ExitVPBB : Plan.getExitBlocks()) { - BasicBlock *ExitBB = ExitVPBB->getIRBasicBlock(); for (VPRecipeBase &R : *ExitVPBB) { auto *ExitIRI = dyn_cast(&R); if (!ExitIRI) @@ -8892,26 +8891,34 @@ static SetVector collectUsersInExitBlocks( auto *ExitPhi = dyn_cast(&ExitIRI->getInstruction()); if (!ExitPhi) break; - for (BasicBlock *ExitingBB : predecessors(ExitBB)) { - if (!OrigLoop->contains(ExitingBB)) - continue; - Value *IncomingValue = ExitPhi->getIncomingValueForBlock(ExitingBB); - VPValue *V = Builder.getVPValueOrAddLiveIn(IncomingValue); - // Exit values for inductions are computed and updated outside of VPlan - // and independent of induction recipes. - // TODO: Compute induction exit values in VPlan. - if ((isa(V) && - !cast(V)->getTruncInst()) || - isa(V) || - (isa(IncomingValue) && - OrigLoop->contains(cast(IncomingValue)) && - any_of(IncomingValue->users(), [&Inductions](User *U) { - auto *P = dyn_cast(U); - return P && Inductions.contains(P); - }))) - continue; - ExitUsersToFix.insert(ExitIRI); - ExitIRI->addOperand(V); + for (VPBlockBase *PredVPBB : ExitVPBB->getPredecessors()) { + BasicBlock *ExitingBB = OrigLoop->getLoopLatch(); + if (PredVPBB != MiddleVPBB) { + SmallVector ExitingBlocks; + OrigLoop->getExitingBlocks(ExitingBlocks); + assert(ExitingBlocks.size() == 2 && "only support 2 exiting blocks"); + ExitingBB = ExitingBB == ExitingBlocks[0] ? ExitingBlocks[1] + : ExitingBlocks[0]; + } + Value *IncomingValue = ExitPhi->getIncomingValueForBlock(ExitingBB); + VPValue *V = Builder.getVPValueOrAddLiveIn(IncomingValue); + // Exit values for inductions are computed and updated outside of VPlan + // and independent of induction recipes. + // TODO: Compute induction exit values in VPlan. + if ((isa(V) && + !cast(V)->getTruncInst()) || + isa(V) || + (isa(IncomingValue) && + OrigLoop->contains(cast(IncomingValue)) && + any_of(IncomingValue->users(), [&Inductions](User *U) { + auto *P = dyn_cast(U); + return P && Inductions.contains(P); + }))) { + if (ExitVPBB->getSinglePredecessor() == MiddleVPBB) + continue; + } + ExitUsersToFix.insert(ExitIRI); + ExitIRI->addOperand(V); } } } @@ -8919,24 +8926,28 @@ static SetVector collectUsersInExitBlocks( } // Add exit values to \p Plan. Extracts are added for each entry in \p -// ExitUsersToFix if needed and their operands are updated. -static void +// ExitUsersToFix if needed and their operands are updated. Returns true if all +// exit users can be handled, otherwise return false. +static bool addUsersInExitBlocks(VPlan &Plan, const SetVector &ExitUsersToFix) { if (ExitUsersToFix.empty()) - return; + return true; + + auto *MiddleVPBB = Plan.getMiddleBlock(); + VPBuilder B(MiddleVPBB, MiddleVPBB->getFirstNonPhi()); // Introduce extract for exiting values and update the VPIRInstructions // modeling the corresponding LCSSA phis. for (VPIRInstruction *ExitIRI : ExitUsersToFix) { - VPValue *V = ExitIRI->getOperand(0); // Pass live-in values used by exit phis directly through to their users in // the exit block. if (V->isLiveIn()) continue; - // Currently only live-ins can be used by exit values from blocks not exiting via the vector latch through to the middle block. + // Currently only live-ins can be used by exit values from blocks not + // exiting via the vector latch through to the middle block. if (ExitIRI->getParent()->getSinglePredecessor() != MiddleVPBB) return false; @@ -9218,31 +9229,19 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) { RecipeBuilder.fixHeaderPhis(); if (Legal->hasUncountableEarlyExit()) { + assert(Legal->getUncountableExitingBlocks().size() == 1 && + "Only single uncountable exiting bock supported"); VPlanTransforms::handleUncountableEarlyExit( - *Plan, *PSE.getSE(), OrigLoop, Legal->getUncountableExitingBlocks(), + *Plan, *PSE.getSE(), OrigLoop, Legal->getUncountableExitingBlocks()[0], RecipeBuilder); } addScalarResumePhis(RecipeBuilder, *Plan); SetVector ExitUsersToFix = collectUsersInExitBlocks( OrigLoop, RecipeBuilder, *Plan, Legal->getInductionVars()); addExitUsersForFirstOrderRecurrences(*Plan, ExitUsersToFix); - if (!addUsersInExitBlocks(*Plan, ExitUsersToFix)) -<<<<<<< HEAD - - if (Legal->hasUncountableEarlyExit()) { - if (any_of(Plan->getExitBlocks(), [](VPIRBasicBlock *ExitBB) { - return any_of(*ExitBB, [](VPRecipeBase &R) { - auto VPIRI = cast(&R); - return VPIRI->getNumOperands() == 0 || - any_of(VPIRI->operands(), - [](VPValue *Op) { return !Op->isLiveIn(); }); - }); - })) - return nullptr; - } + if (!addUsersInExitBlocks(*Plan, ExitUsersToFix)) + return nullptr; -======= ->>>>>>> origin/main // --------------------------------------------------------------------------- // Transform initial VPlan: Apply previously taken decisions, in order, to // bring the VPlan to its final state. @@ -9998,11 +9997,10 @@ bool LoopVectorizePass::processLoop(Loop *L) { if (LVL.hasUncountableEarlyExit()) { if (!EnableEarlyExitVectorization) { reportVectorizationFailure("Auto-vectorization of loops with uncountable " - "early exit is not yet supported", + "early exit is not enabled", "Auto-vectorization of loops with uncountable " - "early exit is not yet supported", - "UncountableEarlyExitLoopsUnsupported", ORE, - L); + "early exit is no enabled", + "UncountableEarlyExitLoopsDisabled", ORE, L); return false; } } diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index b99196961917eb..db8f0bfd60b4bc 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -872,8 +872,10 @@ VPlanPtr VPlan::createInitialVPlan(Type *InductionTy, // Create SCEV and VPValue for the trip count. // We use the symbolic max backedge-taken-count, which is used when - // vectorizing loops with uncountable early exits + // vectorizing loops with uncountable early exits. const SCEV *BackedgeTakenCountSCEV = PSE.getSymbolicMaxBackedgeTakenCount(); + assert(!isa(BackedgeTakenCountSCEV) && + "Invalid loop count"); ScalarEvolution &SE = *PSE.getSE(); const SCEV *TripCount = SE.getTripCountFromExitCount(BackedgeTakenCountSCEV, InductionTy, TheLoop); @@ -907,16 +909,7 @@ VPlanPtr VPlan::createInitialVPlan(Type *InductionTy, // 2) If we require a scalar epilogue, there is no conditional branch as // we unconditionally branch to the scalar preheader. Do nothing. // 3) Otherwise, construct a runtime check. - BasicBlock *IRExitBlock = TheLoop->getUniqueExitBlock(); - if (!IRExitBlock) { - // If there's no unique exit block (i.e. vectorizing with an uncountable - // early exit), use the block exiting from the latch. The other uncountable - // exit blocks will be added later. - auto *Term = cast(TheLoop->getLoopLatch()->getTerminator()); - IRExitBlock = TheLoop->contains(Term->getSuccessor(0)) - ? Term->getSuccessor(1) - : Term->getSuccessor(0); - } + BasicBlock *IRExitBlock = TheLoop->getUniqueLatchExitBlock(); auto *VPExitBlock = VPIRBasicBlock::fromBasicBlock(IRExitBlock); // The connection order corresponds to the operands of the conditional // branch. diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 9d159b9adfba0a..00c69dd53a3635 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1232,6 +1232,8 @@ class VPInstruction : public VPRecipeWithIRFlags, // operand). Only generates scalar values (either for the first lane only or // for all lanes, depending on its uses). PtrAdd, + // Returns a scalar boolean value, which is true if any lane of its single + // operand is true. AnyOf, }; diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index cc816b0cba22fb..5527ed3c1fad08 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -68,8 +68,6 @@ bool VPRecipeBase::mayWriteToMemory() const { default: return true; } - case VPExpandSCEVSC: - return getParent()->getPlan()->getTripCount() == getVPSingleValue(); case VPInterleaveSC: return cast(this)->getNumStoreOperands() > 0; case VPWidenStoreEVLSC: @@ -165,8 +163,6 @@ bool VPRecipeBase::mayHaveSideEffects() const { case VPScalarCastSC: case VPReverseVectorPointerSC: return false; - case VPExpandSCEVSC: - return getParent()->getPlan()->getTripCount() == getVPSingleValue(); case VPInstructionSC: return mayWriteToMemory(); case VPWidenCallSC: { diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index fce63c340230e3..80089fa4dfa334 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -509,12 +509,6 @@ void VPlanTransforms::removeDeadRecipes(VPlan &Plan) { ReversePostOrderTraversal> RPOT( Plan.getEntry()); - for (VPRecipeBase &R : make_early_inc_range( - reverse(*cast(Plan.getPreheader())))) { - if (isDeadRecipe(R)) - R.eraseFromParent(); - } - for (VPBasicBlock *VPBB : reverse(VPBlockUtils::blocksOnly(RPOT))) { // The recipes in the block are processed in reverse order, to catch chains // of dead recipes. @@ -1809,8 +1803,7 @@ void VPlanTransforms::createInterleaveGroups( void VPlanTransforms::handleUncountableEarlyExit( VPlan &Plan, ScalarEvolution &SE, Loop *OrigLoop, - ArrayRef UncountableExitingBlocks, - VPRecipeBuilder &RecipeBuilder) { + BasicBlock *UncountableExitingBlock, VPRecipeBuilder &RecipeBuilder) { auto *LatchVPBB = cast(Plan.getVectorLoopRegion()->getExiting()); VPBuilder Builder(LatchVPBB->getTerminator()); @@ -1818,36 +1811,34 @@ void VPlanTransforms::handleUncountableEarlyExit( VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion(); VPValue *EarlyExitTaken = nullptr; - // Process all uncountable exiting blocks. For each exiting block, update the - // EarlyExitTaken, which tracks if any uncountable early exit has been taken. - // Also split the middle block and branch to the exit block for the early exit - // if it has been taken. - for (BasicBlock *Exiting : UncountableExitingBlocks) { - auto *ExitingTerm = cast(Exiting->getTerminator()); - BasicBlock *TrueSucc = ExitingTerm->getSuccessor(0); - BasicBlock *FalseSucc = ExitingTerm->getSuccessor(1); - VPIRBasicBlock *VPExitBlock; - if (OrigLoop->getUniqueExitBlock()) { - VPExitBlock = cast(MiddleVPBB->getSuccessors()[0]); - } else { - VPExitBlock = VPIRBasicBlock::fromBasicBlock( - !OrigLoop->contains(TrueSucc) ? TrueSucc : FalseSucc); - } + // Process the uncountable exiting block. Update EarlyExitTaken, which tracks + // if any uncountable early exit has been taken. Also split the middle block + // and branch to the exit block for the early exit if it has been taken. + auto *ExitingTerm = + cast(UncountableExitingBlock->getTerminator()); + BasicBlock *TrueSucc = ExitingTerm->getSuccessor(0); + BasicBlock *FalseSucc = ExitingTerm->getSuccessor(1); + VPIRBasicBlock *VPExitBlock; + if (OrigLoop->getUniqueExitBlock()) { + VPExitBlock = cast(MiddleVPBB->getSuccessors()[0]); + } else { + VPExitBlock = VPIRBasicBlock::fromBasicBlock( + !OrigLoop->contains(TrueSucc) ? TrueSucc : FalseSucc); + } - VPValue *M = RecipeBuilder.getBlockInMask( - OrigLoop->contains(TrueSucc) ? TrueSucc : FalseSucc); - auto *N = Builder.createNot(M); - EarlyExitTaken = Builder.createNaryOp(VPInstruction::AnyOf, {N}); + VPValue *M = RecipeBuilder.getBlockInMask( + OrigLoop->contains(TrueSucc) ? TrueSucc : FalseSucc); + auto *N = Builder.createNot(M); + EarlyExitTaken = Builder.createNaryOp(VPInstruction::AnyOf, {N}); - VPBasicBlock *NewMiddle = new VPBasicBlock("middle.split"); - VPBlockUtils::disconnectBlocks(LoopRegion, MiddleVPBB); - VPBlockUtils::insertBlockAfter(NewMiddle, LoopRegion); - VPBlockUtils::connectBlocks(NewMiddle, VPExitBlock); - VPBlockUtils::connectBlocks(NewMiddle, MiddleVPBB); + VPBasicBlock *NewMiddle = new VPBasicBlock("middle.split"); + VPBlockUtils::disconnectBlocks(LoopRegion, MiddleVPBB); + VPBlockUtils::insertBlockAfter(NewMiddle, LoopRegion); + VPBlockUtils::connectBlocks(NewMiddle, VPExitBlock); + VPBlockUtils::connectBlocks(NewMiddle, MiddleVPBB); - VPBuilder MiddleBuilder(NewMiddle); - MiddleBuilder.createNaryOp(VPInstruction::BranchOnCond, {EarlyExitTaken}); - } + VPBuilder MiddleBuilder(NewMiddle); + MiddleBuilder.createNaryOp(VPInstruction::BranchOnCond, {EarlyExitTaken}); // Replace the condition controlling the exit from the vector loop with one // exiting if either the original condition of the vector latch is true or any diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h index dc5dec2f1b84a6..98f30c13c7e9b4 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h @@ -127,13 +127,13 @@ struct VPlanTransforms { /// Update \p Plan to account for uncountable exit blocks in \p /// UncountableExitingBlocks by /// * updating the condition to exit the vector loop to include the early - /// exit conditions + /// exit conditions /// * splitting the original middle block to branch to the early exit blocks - /// if taken. Returns false if the transformation wasn't successful. - static void - handleUncountableEarlyExit(VPlan &Plan, ScalarEvolution &SE, Loop *OrigLoop, - ArrayRef UncountableExitingBlocks, - VPRecipeBuilder &RecipeBuilder); + /// if taken. Returns false if the transformation wasn't successful. + static void handleUncountableEarlyExit(VPlan &Plan, ScalarEvolution &SE, + Loop *OrigLoop, + BasicBlock *UncountableExitingBlock, + VPRecipeBuilder &RecipeBuilder); }; } // namespace llvm diff --git a/llvm/test/Transforms/LoopVectorize/early_exit_legality.ll b/llvm/test/Transforms/LoopVectorize/early_exit_legality.ll index 21433477c1d7a3..f7fd55a0f039a3 100644 --- a/llvm/test/Transforms/LoopVectorize/early_exit_legality.ll +++ b/llvm/test/Transforms/LoopVectorize/early_exit_legality.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 ; REQUIRES: asserts -; RUN: opt -S < %s -p loop-vectorize -debug-only=loop-vectorize -disable-output 2>&1 | FileCheck %s +; RUN: opt -S < %s -p loop-vectorize -debug-only=loop-vectorize -enable-early-exit-vectorization -force-vector-width=4 -disable-output 2>&1 | FileCheck %s declare void @init_mem(ptr, i64); diff --git a/llvm/test/Transforms/LoopVectorize/single_early_exit.ll b/llvm/test/Transforms/LoopVectorize/single_early_exit.ll index 52f82d007de4df..08a333fa865154 100644 --- a/llvm/test/Transforms/LoopVectorize/single_early_exit.ll +++ b/llvm/test/Transforms/LoopVectorize/single_early_exit.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 -; RUN: opt -S < %s -p loop-vectorize | FileCheck %s +; RUN: opt -S < %s -p loop-vectorize -enable-early-exit-vectorization -force-vector-width=4 | FileCheck %s declare void @init_mem(ptr, i64); @@ -11,21 +11,47 @@ define i64 @same_exit_block_phi_of_consts() { ; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1 ; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) ; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: ; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX1]] +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP2]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1 +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]] +; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX1]], 4 +; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[TMP5]], splat (i1 true) +; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]]) +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64 +; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]] +; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: middle.split: +; CHECK-NEXT: br i1 [[TMP7]], label [[LOOP_END:%.*]], label [[MIDDLE_BLOCK:%.*]] +; CHECK: middle.block: +; CHECK-NEXT: br i1 true, label [[LOOP_END]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 67, [[MIDDLE_BLOCK]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: br label [[LOOP1:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] ; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] ; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 ; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] -; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]] +; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END]] ; CHECK: loop.inc: ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP1]], label [[LOOP_END]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: loop.end: -; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 0, [[LOOP]] ], [ 1, [[LOOP_INC]] ] +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 0, [[LOOP1]] ], [ 1, [[LOOP_INC]] ], [ 1, [[MIDDLE_BLOCK]] ], [ 0, [[MIDDLE_SPLIT]] ] ; CHECK-NEXT: ret i64 [[RETVAL]] ; entry: @@ -62,19 +88,45 @@ define i64 @diff_exit_block_phi_of_consts() { ; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1 ; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) ; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: ; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX1]] +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP2]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1 +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]] +; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX1]], 4 +; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[TMP5]], splat (i1 true) +; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]]) +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64 +; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]] +; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK: middle.split: +; CHECK-NEXT: br i1 [[TMP7]], label [[LOOP_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]] +; CHECK: middle.block: +; CHECK-NEXT: br i1 true, label [[LOOP_END:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 67, [[MIDDLE_BLOCK]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: br label [[LOOP1:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] ; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] ; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 ; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] -; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_EARLY_EXIT:%.*]] +; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_EARLY_EXIT]] ; CHECK: loop.inc: ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END:%.*]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP1]], label [[LOOP_END]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: loop.early.exit: ; CHECK-NEXT: ret i64 0 ; CHECK: loop.end: @@ -119,22 +171,66 @@ define i32 @diff_exit_block_needs_scev_check(i32 %end) { ; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) ; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) ; CHECK-NEXT: [[END_CLAMPED:%.*]] = and i32 [[END]], 1023 +; CHECK-NEXT: [[TMP19:%.*]] = trunc i32 [[END]] to i10 +; CHECK-NEXT: [[TMP20:%.*]] = zext i10 [[TMP19]] to i64 +; CHECK-NEXT: [[UMAX1:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP20]], i64 1) +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[UMAX1]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] +; CHECK: vector.scevcheck: +; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[END_CLAMPED]], i32 1) +; CHECK-NEXT: [[TMP2:%.*]] = add nsw i32 [[UMAX]], -1 +; CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i8 +; CHECK-NEXT: [[TMP4:%.*]] = add i8 1, [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i8 [[TMP4]], 1 +; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i32 [[TMP2]], 255 +; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]] +; CHECK-NEXT: br i1 [[TMP7]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[UMAX1]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[UMAX1]], [[N_MOD_VF]] +; CHECK-NEXT: [[IND_END:%.*]] = trunc i64 [[N_VEC]] to i8 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[P1]], i64 [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP10]], align 4 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[P2]], i64 [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP12]], align 4 +; CHECK-NEXT: [[TMP13:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], [[WIDE_LOAD3]] +; CHECK-NEXT: [[TMP14:%.*]] = xor <4 x i1> [[TMP13]], splat (i1 true) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP15:%.*]] = xor <4 x i1> [[TMP14]], splat (i1 true) +; CHECK-NEXT: [[TMP16:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP15]]) +; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: [[TMP18:%.*]] = or i1 [[TMP16]], [[TMP17]] +; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_SPLIT:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK: middle.split: +; CHECK-NEXT: br i1 [[TMP16]], label [[FOUND:%.*]], label [[MIDDLE_BLOCK:%.*]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[UMAX1]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: br label [[FOR_BODY1:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[IND:%.*]] = phi i8 [ [[IND_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[GEP_IND:%.*]] = phi i64 [ [[GEP_IND_NEXT:%.*]], [[FOR_INC]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: [[IND:%.*]] = phi i8 [ [[IND_NEXT:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[GEP_IND:%.*]] = phi i64 [ [[GEP_IND_NEXT:%.*]], [[FOR_INC]] ], [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[P1]], i64 [[GEP_IND]] ; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[P2]], i64 [[GEP_IND]] ; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 ; CHECK-NEXT: [[CMP_EARLY:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]] -; CHECK-NEXT: br i1 [[CMP_EARLY]], label [[FOUND:%.*]], label [[FOR_INC]] +; CHECK-NEXT: br i1 [[CMP_EARLY]], label [[FOUND]], label [[FOR_INC]] ; CHECK: for.inc: ; CHECK-NEXT: [[IND_NEXT]] = add i8 [[IND]], 1 ; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[IND_NEXT]] to i32 ; CHECK-NEXT: [[GEP_IND_NEXT]] = add i64 [[GEP_IND]], 1 ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[CONV]], [[END_CLAMPED]] -; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[EXIT:%.*]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY1]], label [[EXIT]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: found: ; CHECK-NEXT: ret i32 1 ; CHECK: exit: @@ -183,14 +279,33 @@ define i32 @diff_blocks_invariant_early_exit_cond(ptr %s) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[SVAL:%.*]] = load i32, ptr [[S]], align 4 ; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[SVAL]], 0 +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[COND]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 +; CHECK-NEXT: [[TMP0:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT]], splat (i1 true) +; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP0]]) +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 276 +; CHECK-NEXT: [[TMP3:%.*]] = or i1 [[TMP1]], [[TMP2]] +; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_SPLIT:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK: middle.split: +; CHECK-NEXT: br i1 [[TMP1]], label [[EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]] +; CHECK: middle.block: +; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 266, [[MIDDLE_BLOCK]] ], [ -10, [[ENTRY:%.*]] ] +; CHECK-NEXT: br label [[FOR_BODY1:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[IND:%.*]] = phi i32 [ -10, [[ENTRY:%.*]] ], [ [[IND_NEXT:%.*]], [[FOR_INC:%.*]] ] -; CHECK-NEXT: br i1 [[COND]], label [[FOR_INC]], label [[EARLY_EXIT:%.*]] +; CHECK-NEXT: [[IND:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IND_NEXT:%.*]], [[FOR_INC:%.*]] ] +; CHECK-NEXT: br i1 [[COND]], label [[FOR_INC]], label [[EARLY_EXIT]] ; CHECK: for.inc: ; CHECK-NEXT: [[IND_NEXT]] = add nsw i32 [[IND]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[IND_NEXT]], 266 -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY1]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: early.exit: ; CHECK-NEXT: tail call void @abort() ; CHECK-NEXT: unreachable @@ -218,3 +333,15 @@ early.exit: for.end: ret i32 0 } +;. +; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} +; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} +; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} +; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} +; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} +; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]} +; CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]} +; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META1]]} +; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]} +; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META2]], [[META1]]} +;. diff --git a/llvm/test/Transforms/LoopVectorize/uncountable-early-exit-vplan.ll b/llvm/test/Transforms/LoopVectorize/uncountable-early-exit-vplan.ll index d840646a259529..9df40d77ecc5fd 100644 --- a/llvm/test/Transforms/LoopVectorize/uncountable-early-exit-vplan.ll +++ b/llvm/test/Transforms/LoopVectorize/uncountable-early-exit-vplan.ll @@ -114,6 +114,162 @@ e2: ret i64 %p2 } +define i64 @multi_exiting_to_same_exit_live_in_exit_values() { +; CHECK-LABEL: VPlan 'Final VPlan for VF={4},UF={1}' { +; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF +; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count +; CHECK-NEXT: Live-in ir<128> = original trip-count +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %src = alloca [128 x i32], align 4 +; CHECK-NEXT: IR call void @init(ptr %src) +; CHECK-NEXT: No successors +; CHECK-EMPTY: +; CHECK-NEXT: vector.ph: +; CHECK-NEXT: Successor(s): vector loop +; CHECK-EMPTY: +; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vector.body: +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next> +; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> +; CHECK-NEXT: CLONE ir<%gep.src> = getelementptr inbounds ir<%src>, vp<%3> +; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep.src> +; CHECK-NEXT: WIDEN ir<%l> = load vp<[[VEC_PTR]]> +; CHECK-NEXT: WIDEN ir<%c.1> = icmp eq ir<%l>, ir<10> +; CHECK-NEXT: EMIT vp<[[NOT1:%.+]]> = not ir<%c.1> +; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> +; CHECK-NEXT: EMIT vp<[[NOT2:%.+]]> = not vp<[[NOT1]]> +; CHECK-NEXT: EMIT vp<[[EA_TAKEN:%.+]]> = any-of vp<[[NOT2]]> +; CHECK-NEXT: EMIT vp<[[LATCH_CMP:%.+]]> = icmp eq vp<%index.next>, vp<[[VTC]]> +; CHECK-NEXT: EMIT vp<[[EC:%.+]]> = or vp<[[EA_TAKEN]]>, vp<[[LATCH_CMP]]> +; CHECK-NEXT: EMIT branch-on-cond vp<[[EC]]> +; CHECK-NEXT: No successors +; CHECK-NEXT: } +; CHECK-NEXT: Successor(s): middle.split +; CHECK-EMPTY: +; CHECK-NEXT: middle.split: +; CHECK-NEXT: EMIT branch-on-cond vp<[[EA_TAKEN]]> +; CHECK-NEXT: Successor(s): ir-bb, middle.block +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %p = phi i64 [ 0, %loop.header ], [ 1, %loop.latch ] (extra operand: ir<1>, ir<0>) +; CHECK-NEXT: No successors +; CHECK-EMPTY: +; CHECK-NEXT: middle.block: +; CHECK-NEXT: EMIT vp<[[MIDDLE_CMP:%.+]]> = icmp eq ir<128>, vp<[[VTC]]> +; CHECK-NEXT: EMIT branch-on-cond vp<[[MIDDLE_CMP]]> +; CHECK-NEXT: Successor(s): ir-bb, scalar.ph +; CHECK-EMPTY: +; CHECK-NEXT: scalar.ph: +; CHECK-NEXT: ir-bb +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %iv = phi i64 [ %inc, %loop.latch ], [ 0, %entry ] +; CHECK: No successors +; CHECK-NEXT: } + +entry: + %src = alloca [128 x i32] + call void @init(ptr %src) + br label %loop.header + +loop.header: + %iv = phi i64 [ %inc, %loop.latch ], [ 0, %entry ] + %gep.src = getelementptr inbounds i32, ptr %src, i64 %iv + %l = load i32, ptr %gep.src + %c.1 = icmp eq i32 %l, 10 + br i1 %c.1, label %exit, label %loop.latch + +loop.latch: + %inc = add nuw i64 %iv, 1 + %c.2 = icmp eq i64 %inc, 128 + br i1 %c.2, label %exit, label %loop.header + +exit: + %p = phi i64 [ 0, %loop.header ], [ 1, %loop.latch ] + ret i64 %p +} + +define i64 @multi_exiting_to_same_exit_live_in_exit_values_2() { +; CHECK-LABEL: VPlan 'Final VPlan for VF={4},UF={1}' { +; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF +; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count +; CHECK-NEXT: Live-in ir<128> = original trip-count +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %src = alloca [128 x i32], align 4 +; CHECK-NEXT: IR call void @init(ptr %src) +; CHECK-NEXT: No successors +; CHECK-EMPTY: +; CHECK-NEXT: vector.ph: +; CHECK-NEXT: Successor(s): vector loop +; CHECK-EMPTY: +; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vector.body: +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next> +; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> +; CHECK-NEXT: CLONE ir<%gep.src> = getelementptr inbounds ir<%src>, vp<%3> +; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep.src> +; CHECK-NEXT: WIDEN ir<%l> = load vp<[[VEC_PTR]]> +; CHECK-NEXT: WIDEN ir<%c.1> = icmp eq ir<%l>, ir<10> +; CHECK-NEXT: EMIT vp<[[NOT1:%.+]]> = not ir<%c.1> +; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> +; CHECK-NEXT: EMIT vp<[[NOT2:%.+]]> = not vp<[[NOT1]]> +; CHECK-NEXT: EMIT vp<[[EA_TAKEN:%.+]]> = any-of vp<[[NOT2]]> +; CHECK-NEXT: EMIT vp<[[LATCH_CMP:%.+]]> = icmp eq vp<%index.next>, vp<[[VTC]]> +; CHECK-NEXT: EMIT vp<[[EC:%.+]]> = or vp<[[EA_TAKEN]]>, vp<[[LATCH_CMP]]> +; CHECK-NEXT: EMIT branch-on-cond vp<[[EC]]> +; CHECK-NEXT: No successors +; CHECK-NEXT: } +; CHECK-NEXT: Successor(s): middle.split +; CHECK-EMPTY: +; CHECK-NEXT: middle.split: +; CHECK-NEXT: EMIT branch-on-cond vp<[[EA_TAKEN]]> +; CHECK-NEXT: Successor(s): ir-bb, middle.block +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %p = phi i64 [ 0, %loop.header ], [ 1, %loop.latch ] (extra operand: ir<1>, ir<0>) +; CHECK-NEXT: No successors +; CHECK-EMPTY: +; CHECK-NEXT: middle.block: +; CHECK-NEXT: EMIT vp<[[MIDDLE_CMP:%.+]]> = icmp eq ir<128>, vp<[[VTC]]> +; CHECK-NEXT: EMIT branch-on-cond vp<[[MIDDLE_CMP]]> +; CHECK-NEXT: Successor(s): ir-bb, scalar.ph +; CHECK-EMPTY: +; CHECK-NEXT: scalar.ph: +; CHECK-NEXT: ir-bb +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %iv = phi i64 [ %inc, %loop.latch ], [ 0, %entry ] +; CHECK: No successors +; CHECK-NEXT: } + +entry: + %src = alloca [128 x i32] + call void @init(ptr %src) + br label %loop.header + +loop.header: + %iv = phi i64 [ %inc, %loop.latch ], [ 0, %entry ] + %gep.src = getelementptr inbounds i32, ptr %src, i64 %iv + %l = load i32, ptr %gep.src + %c.1 = icmp eq i32 %l, 10 + br i1 %c.1, label %exit, label %loop.latch + +loop.latch: + %inc = add nuw i64 %iv, 1 + %c.2 = icmp eq i64 %inc, 128 + br i1 %c.2, label %exit, label %loop.header + +exit: + %p = phi i64 [ 0, %loop.header ], [ 1, %loop.latch ] + ret i64 %p + +; uselistorder directives + uselistorder label %exit, { 1, 0 } +} + + define i64 @multi_exiting_to_same_exit_load_exit_value() { ; CHECK-NOT: VPlan 'Final VPlan for VF={4},UF={1}' { diff --git a/llvm/test/Transforms/LoopVectorize/unsupported_early_exit.ll b/llvm/test/Transforms/LoopVectorize/unsupported_early_exit.ll index cd91d07120f9ee..5b2a95f1b368c3 100644 --- a/llvm/test/Transforms/LoopVectorize/unsupported_early_exit.ll +++ b/llvm/test/Transforms/LoopVectorize/unsupported_early_exit.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 -; RUN: opt -S < %s -p loop-vectorize | FileCheck %s +; RUN: opt -S < %s -p loop-vectorize -enable-early-exit-vectorization -force-vector-width=4 | FileCheck %s declare void @init_mem(ptr, i64); From 06c3d39f4f2ce65569ec71fef8d15ab36aa2f24b Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sat, 23 Nov 2024 15:40:10 +0000 Subject: [PATCH 11/19] [VPlan] Print incoming VPBB for phi VPIRInstruction (NFC). --- llvm/lib/Transforms/Vectorize/VPlan.h | 2 +- llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 4 +++- .../RISCV/vplan-vp-intrinsics-reduction.ll | 8 ++++---- .../first-order-recurrence-chains-vplan.ll | 18 +++++++++--------- ...t-order-recurrence-sink-replicate-region.ll | 14 +++++++------- .../interleave-and-scalarize-only.ll | 2 +- .../Transforms/LoopVectorize/vplan-printing.ll | 10 +++++----- 7 files changed, 30 insertions(+), 28 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 00c69dd53a3635..fc7e574a7d88cb 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -643,7 +643,7 @@ class VPBlockBase { virtual void dropAllReferences(VPValue *NewValue) = 0; #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) - void printAsOperand(raw_ostream &OS, bool PrintType) const { + void printAsOperand(raw_ostream &OS, bool PrintType = false) const { OS << getName(); } diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 5527ed3c1fad08..57649e0233f9d5 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -869,7 +869,9 @@ void VPIRInstruction::print(raw_ostream &O, const Twine &Indent, if (getNumOperands() != 0) { O << " (extra operand: "; - printOperands(O, SlotTracker); + getOperand(0)->printAsOperand(O, SlotTracker); + O << " from "; + getParent()->getPredecessors()[0]->printAsOperand(O); O << ")"; } } diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll index 414f39d557044a..ab541f6fa94e66 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll @@ -60,7 +60,7 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) { ; IF-EVL-OUTLOOP-NEXT: Successor(s): ir-bb, scalar.ph ; IF-EVL-OUTLOOP-EMPTY: ; IF-EVL-OUTLOOP-NEXT: ir-bb: -; IF-EVL-OUTLOOP-NEXT: IR %add.lcssa = phi i32 [ %add, %for.body ] (extra operand: vp<[[RDX_EX]]>) +; IF-EVL-OUTLOOP-NEXT: IR %add.lcssa = phi i32 [ %add, %for.body ] (extra operand: vp<[[RDX_EX]]> from middle.block) ; IF-EVL-OUTLOOP-NEXT: No successors ; IF-EVL-OUTLOOP-EMPTY: ; IF-EVL-OUTLOOP-NEXT: scalar.ph: @@ -110,7 +110,7 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) { ; IF-EVL-INLOOP-NEXT: Successor(s): ir-bb, scalar.ph ; IF-EVL-INLOOP-EMPTY: ; IF-EVL-INLOOP-NEXT: ir-bb: -; IF-EVL-INLOOP-NEXT: IR %add.lcssa = phi i32 [ %add, %for.body ] (extra operand: vp<[[RDX_EX]]>) +; IF-EVL-INLOOP-NEXT: IR %add.lcssa = phi i32 [ %add, %for.body ] (extra operand: vp<[[RDX_EX]]> from middle.block) ; IF-EVL-INLOOP-NEXT: No successors ; IF-EVL-INLOOP-EMPTY: ; IF-EVL-INLOOP-NEXT: scalar.ph: @@ -156,7 +156,7 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) { ; NO-VP-OUTLOOP-NEXT: Successor(s): ir-bb, scalar.ph ; NO-VP-OUTLOOP-EMPTY: ; NO-VP-OUTLOOP-NEXT: ir-bb: -; NO-VP-OUTLOOP-NEXT: IR %add.lcssa = phi i32 [ %add, %for.body ] (extra operand: vp<[[RDX_EX]]>) +; NO-VP-OUTLOOP-NEXT: IR %add.lcssa = phi i32 [ %add, %for.body ] (extra operand: vp<[[RDX_EX]]> from middle.block) ; NO-VP-OUTLOOP-NEXT: No successors ; NO-VP-OUTLOOP-EMPTY: ; NO-VP-OUTLOOP-NEXT: scalar.ph: @@ -202,7 +202,7 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) { ; NO-VP-INLOOP-NEXT: Successor(s): ir-bb, scalar.ph ; NO-VP-INLOOP-EMPTY: ; NO-VP-INLOOP-NEXT: ir-bb: -; NO-VP-INLOOP-NEXT: IR %add.lcssa = phi i32 [ %add, %for.body ] (extra operand: vp<[[RDX_EX]]>) +; NO-VP-INLOOP-NEXT: IR %add.lcssa = phi i32 [ %add, %for.body ] (extra operand: vp<[[RDX_EX]]> from middle.block) ; NO-VP-INLOOP-NEXT: No successors ; NO-VP-INLOOP-EMPTY: ; NO-VP-INLOOP-NEXT: scalar.ph: diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll index bcacfb358ec05a..517de8be5c9987 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll @@ -48,8 +48,8 @@ define void @test_chained_first_order_recurrences_1(ptr %ptr) { ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: -; CHECK-NEXT: IR %for.1 = phi i16 [ 22, %entry ], [ %for.1.next, %loop ] (extra operand: vp<[[RESUME_1_P]]>) -; CHECK-NEXT: IR %for.2 = phi i16 [ 33, %entry ], [ %for.1, %loop ] (extra operand: vp<[[RESUME_2_P]]>.1) +; CHECK-NEXT: IR %for.1 = phi i16 [ 22, %entry ], [ %for.1.next, %loop ] (extra operand: vp<[[RESUME_1_P]]> from scalar.ph) +; CHECK-NEXT: IR %for.2 = phi i16 [ 33, %entry ], [ %for.1, %loop ] (extra operand: vp<[[RESUME_2_P]]>.1 from scalar.ph) ; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] ; CHECK: IR %exitcond.not = icmp eq i64 %iv.next, 1000 ; CHECK-NEXT: No successors @@ -125,9 +125,9 @@ define void @test_chained_first_order_recurrences_3(ptr %ptr) { ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: -; CHECK-NEXT: IR %for.1 = phi i16 [ 22, %entry ], [ %for.1.next, %loop ] (extra operand: vp<[[RESUME_1_P]]>) -; CHECK-NEXT: IR %for.2 = phi i16 [ 33, %entry ], [ %for.1, %loop ] (extra operand: vp<[[RESUME_2_P]]>.1) -; CHECK-NEXT: IR %for.3 = phi i16 [ 33, %entry ], [ %for.2, %loop ] (extra operand: vp<[[RESUME_3_P]]>.2) +; CHECK-NEXT: IR %for.1 = phi i16 [ 22, %entry ], [ %for.1.next, %loop ] (extra operand: vp<[[RESUME_1_P]]> from scalar.ph) +; CHECK-NEXT: IR %for.2 = phi i16 [ 33, %entry ], [ %for.1, %loop ] (extra operand: vp<[[RESUME_2_P]]>.1 from scalar.ph) +; CHECK-NEXT: IR %for.3 = phi i16 [ 33, %entry ], [ %for.2, %loop ] (extra operand: vp<[[RESUME_3_P]]>.2 from scalar.ph) ; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] ; CHECK: IR %exitcond.not = icmp eq i64 %iv.next, 1000 ; CHECK-NEXT: No successors @@ -205,8 +205,8 @@ define i32 @test_chained_first_order_recurrences_4(ptr %base, i64 %x) { ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: ; CHECK-NEXT: IR %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ] -; CHECK-NEXT: IR %for.x = phi i64 [ %for.x.next, %loop ], [ 0, %entry ] (extra operand: vp<[[RESUME_X]]>) -; CHECK-NEXT: IR %for.y = phi i32 [ %for.x.prev, %loop ], [ 0, %entry ] (extra operand: vp<[[RESUME_Y]]>.1) +; CHECK-NEXT: IR %for.x = phi i64 [ %for.x.next, %loop ], [ 0, %entry ] (extra operand: vp<[[RESUME_X]]> from scalar.ph) +; CHECK-NEXT: IR %for.y = phi i32 [ %for.x.prev, %loop ], [ 0, %entry ] (extra operand: vp<[[RESUME_Y]]>.1 from scalar.ph) ; CHECK: No successors ; CHECK-NEXT: } ; @@ -279,8 +279,8 @@ define i32 @test_chained_first_order_recurrences_5_hoist_to_load(ptr %base) { ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: ; CHECK-NEXT: IR %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ] -; CHECK-NEXT: IR %for.x = phi i64 [ %for.x.next, %loop ], [ 0, %entry ] (extra operand: vp<[[RESUME_X]]>) -; CHECK-NEXT: IR %for.y = phi i32 [ %for.x.prev, %loop ], [ 0, %entry ] (extra operand: vp<[[RESUME_Y]]>.1) +; CHECK-NEXT: IR %for.x = phi i64 [ %for.x.next, %loop ], [ 0, %entry ] (extra operand: vp<[[RESUME_X]]> from scalar.ph) +; CHECK-NEXT: IR %for.y = phi i32 [ %for.x.prev, %loop ], [ 0, %entry ] (extra operand: vp<[[RESUME_Y]]>.1 from scalar.ph) ; CHECK: No successors ; CHECK-NEXT: } ; diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll index 8ae538cf63986b..d0c811763a522a 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll @@ -85,7 +85,7 @@ define void @sink_replicate_region_1(i32 %x, ptr %ptr, ptr noalias %dst) optsize ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: -; CHECK-NEXT: IR %0 = phi i32 [ 0, %entry ], [ %conv, %loop ] (extra operand: vp<[[RESUME_1_P]]>) +; CHECK-NEXT: IR %0 = phi i32 [ 0, %entry ], [ %conv, %loop ] (extra operand: vp<[[RESUME_1_P]]> from scalar.ph) ; CHECK-NEXT: IR %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] ; CHECK: IR %ec = icmp eq i32 %iv.next, 20001 ; CHECK-NEXT: No successors @@ -172,7 +172,7 @@ define void @sink_replicate_region_2(i32 %x, i8 %y, ptr %ptr) optsize { ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: -; CHECK-NEXT: IR %recur = phi i32 [ 0, %entry ], [ %recur.next, %loop ] (extra operand: vp<[[RESUME_1_P]]>) +; CHECK-NEXT: IR %recur = phi i32 [ 0, %entry ], [ %recur.next, %loop ] (extra operand: vp<[[RESUME_1_P]]> from scalar.ph) ; CHECK-NEXT: IR %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] ; CHECK: IR %ec = icmp eq i32 %iv.next, 20001 ; CHECK-NEXT: No successors @@ -235,7 +235,7 @@ define i32 @sink_replicate_region_3_reduction(i32 %x, i8 %y, ptr %ptr) optsize { ; CHECK-NEXT: Successor(s): ir-bb, scalar.ph ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb -; CHECK-NEXT: IR %res = phi i32 [ %and.red.next, %loop ] (extra operand: vp<[[RED_EX]]>) +; CHECK-NEXT: IR %res = phi i32 [ %and.red.next, %loop ] (extra operand: vp<[[RED_EX]]> from middle.block) ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph @@ -244,7 +244,7 @@ define i32 @sink_replicate_region_3_reduction(i32 %x, i8 %y, ptr %ptr) optsize { ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: -; CHECK-NEXT: IR %recur = phi i32 [ 0, %entry ], [ %recur.next, %loop ] (extra operand: vp<[[RESUME_1_P]]>) +; CHECK-NEXT: IR %recur = phi i32 [ 0, %entry ], [ %recur.next, %loop ] (extra operand: vp<[[RESUME_1_P]]> from scalar.ph) ; CHECK-NEXT: IR %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] ; CHECK-NEXT: IR %and.red = phi i32 [ 1234, %entry ], [ %and.red.next, %loop ] ; CHECK: IR %ec = icmp eq i32 %iv.next, 20001 @@ -355,7 +355,7 @@ define void @sink_replicate_region_4_requires_split_at_end_of_block(i32 %x, ptr ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: -; CHECK-NEXT: IR %0 = phi i32 [ 0, %entry ], [ %conv, %loop ] (extra operand: vp<[[RESUME_1_P]]>) +; CHECK-NEXT: IR %0 = phi i32 [ 0, %entry ], [ %conv, %loop ] (extra operand: vp<[[RESUME_1_P]]> from scalar.ph) ; CHECK-NEXT: IR %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] ; CHECK: IR %ec = icmp eq i32 %iv.next, 20001 ; CHECK-NEXT: No successors @@ -452,7 +452,7 @@ define void @sink_replicate_region_after_replicate_region(ptr %ptr, ptr noalias ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: -; CHECK-NEXT: IR %recur = phi i32 [ 0, %entry ], [ %recur.next, %loop ] (extra operand: vp<[[RESUME_1_P]]>) +; CHECK-NEXT: IR %recur = phi i32 [ 0, %entry ], [ %recur.next, %loop ] (extra operand: vp<[[RESUME_1_P]]> from scalar.ph) ; CHECK-NEXT: IR %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] ; CHECK: IR %C = icmp sgt i32 %iv.next, %recur.next ; CHECK-NEXT: No successors @@ -539,7 +539,7 @@ define void @need_new_block_after_sinking_pr56146(i32 %x, ptr %src, ptr noalias ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: ; CHECK-NEXT: IR %iv = phi i64 [ 2, %entry ], [ %iv.next, %loop ] -; CHECK-NEXT: IR %.pn = phi i32 [ 0, %entry ], [ %l, %loop ] (extra operand: vp<[[RESUME_1_P]]>) +; CHECK-NEXT: IR %.pn = phi i32 [ 0, %entry ], [ %l, %loop ] (extra operand: vp<[[RESUME_1_P]]> from scalar.ph) ; CHECK: IR %ec = icmp ugt i64 %iv, 3 ; CHECK-NEXT: No successors ; CHECK-NEXT: } diff --git a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll index a71666d8c3167a..dd58dc81ccedde 100644 --- a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll +++ b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll @@ -227,7 +227,7 @@ exit: ; DBG-EMPTY: ; DBG-NEXT: ir-bb: ; DBG-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] -; DBG-NEXT: IR %for = phi i32 [ 0, %entry ], [ %iv.trunc, %loop ] (extra operand: vp<[[RESUME_P]]>) +; DBG-NEXT: IR %for = phi i32 [ 0, %entry ], [ %iv.trunc, %loop ] (extra operand: vp<[[RESUME_P]]> from scalar.ph) ; DBG: IR %ec = icmp slt i32 %iv.next.trunc, %n ; DBG-NEXT: No successors ; DBG-NEXT: } diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll index 6bb20a301e0ade..195f6a48640e54 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll @@ -171,7 +171,7 @@ define float @print_reduction(i64 %n, ptr noalias %y) { ; CHECK-NEXT: Successor(s): ir-bb, scalar.ph ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb -; CHECK-NEXT: IR %red.next.lcssa = phi float [ %red.next, %for.body ] (extra operand: vp<[[RED_EX]]>) +; CHECK-NEXT: IR %red.next.lcssa = phi float [ %red.next, %for.body ] (extra operand: vp<[[RED_EX]]> from middle.block) ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph @@ -476,7 +476,7 @@ define float @print_fmuladd_strict(ptr %a, ptr %b, i64 %n) { ; CHECK-NEXT: Successor(s): ir-bb, scalar.ph ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb -; CHECK-NEXT: IR %muladd.lcssa = phi float [ %muladd, %for.body ] (extra operand: vp<[[RED_EX]]>) +; CHECK-NEXT: IR %muladd.lcssa = phi float [ %muladd, %for.body ] (extra operand: vp<[[RED_EX]]> from middle.block) ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph @@ -716,7 +716,7 @@ define i32 @print_exit_value(ptr %ptr, i32 %off) { ; CHECK-NEXT: Successor(s): ir-bb, scalar.ph ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb -; CHECK-NEXT: IR %lcssa = phi i32 [ %add, %loop ] (extra operand: vp<[[EXIT]]>) +; CHECK-NEXT: IR %lcssa = phi i32 [ %add, %loop ] (extra operand: vp<[[EXIT]]> from middle.block) ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph @@ -1111,7 +1111,7 @@ define i16 @print_first_order_recurrence_and_result(ptr %ptr) { ; CHECK-NEXT: Successor(s): ir-bb, scalar.ph ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb -; CHECK-NEXT: IR %for.1.lcssa = phi i16 [ %for.1, %loop ] (extra operand: vp<[[FOR_RESULT]]>) +; CHECK-NEXT: IR %for.1.lcssa = phi i16 [ %for.1, %loop ] (extra operand: vp<[[FOR_RESULT]]> from middle.block) ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph @@ -1119,7 +1119,7 @@ define i16 @print_first_order_recurrence_and_result(ptr %ptr) { ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: -; CHECK-NEXT: IR %for.1 = phi i16 [ 22, %entry ], [ %for.1.next, %loop ] (extra operand: vp<[[RESUME_P]]>) +; CHECK-NEXT: IR %for.1 = phi i16 [ 22, %entry ], [ %for.1.next, %loop ] (extra operand: vp<[[RESUME_P]]> from scalar.ph) ; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] ; CHECK: IR %exitcond.not = icmp eq i64 %iv.next, 1000 ; CHECK-NEXT: No successors From 552bd9161f07ace9083c3ab409219add05aaf2bc Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sat, 23 Nov 2024 18:59:09 +0000 Subject: [PATCH 12/19] !fixup update recipe printing --- llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 10 ++++++---- .../LoopVectorize/uncountable-early-exit-vplan.ll | 8 ++++---- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 57649e0233f9d5..761f2949126e49 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -868,10 +868,12 @@ void VPIRInstruction::print(raw_ostream &O, const Twine &Indent, O << Indent << "IR " << I; if (getNumOperands() != 0) { - O << " (extra operand: "; - getOperand(0)->printAsOperand(O, SlotTracker); - O << " from "; - getParent()->getPredecessors()[0]->printAsOperand(O); + O << " (extra operand" << (getNumOperands() > 1 ? "s" : "") << ": "; + interleaveComma(enumerate(operands()), O, [this, &O, &SlotTracker](auto Op) { + Op.value()->printAsOperand(O, SlotTracker); + O << " from "; + getParent()->getPredecessors()[Op.index()]->printAsOperand(O); + }); O << ")"; } } diff --git a/llvm/test/Transforms/LoopVectorize/uncountable-early-exit-vplan.ll b/llvm/test/Transforms/LoopVectorize/uncountable-early-exit-vplan.ll index 9df40d77ecc5fd..2583070fc9820a 100644 --- a/llvm/test/Transforms/LoopVectorize/uncountable-early-exit-vplan.ll +++ b/llvm/test/Transforms/LoopVectorize/uncountable-early-exit-vplan.ll @@ -41,7 +41,7 @@ define i64 @multi_exiting_to_different_exits_live_in_exit_values() { ; CHECK-NEXT: Successor(s): ir-bb, middle.block ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: -; CHECK-NEXT: IR %p1 = phi i64 [ 0, %loop.header ] (extra operand: ir<0>) +; CHECK-NEXT: IR %p1 = phi i64 [ 0, %loop.header ] (extra operand: ir<0> from middle.split) ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: @@ -50,7 +50,7 @@ define i64 @multi_exiting_to_different_exits_live_in_exit_values() { ; CHECK-NEXT: Successor(s): ir-bb, scalar.ph ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: -; CHECK-NEXT: IR %p2 = phi i64 [ 1, %loop.latch ] (extra operand: ir<1>) +; CHECK-NEXT: IR %p2 = phi i64 [ 1, %loop.latch ] (extra operand: ir<1> from middle.block) ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph: @@ -152,7 +152,7 @@ define i64 @multi_exiting_to_same_exit_live_in_exit_values() { ; CHECK-NEXT: Successor(s): ir-bb, middle.block ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: -; CHECK-NEXT: IR %p = phi i64 [ 0, %loop.header ], [ 1, %loop.latch ] (extra operand: ir<1>, ir<0>) +; CHECK-NEXT: IR %p = phi i64 [ 0, %loop.header ], [ 1, %loop.latch ] (extra operands: ir<1> from middle.block, ir<0> from middle.split) ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: @@ -228,7 +228,7 @@ define i64 @multi_exiting_to_same_exit_live_in_exit_values_2() { ; CHECK-NEXT: Successor(s): ir-bb, middle.block ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: -; CHECK-NEXT: IR %p = phi i64 [ 0, %loop.header ], [ 1, %loop.latch ] (extra operand: ir<1>, ir<0>) +; CHECK-NEXT: IR %p = phi i64 [ 0, %loop.header ], [ 1, %loop.latch ] (extra operands: ir<1> from middle.block, ir<0> from middle.split) ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: From 00dea4a96e5b9cf863d30f3f5f06aab6acbd21a7 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sat, 23 Nov 2024 19:16:35 +0000 Subject: [PATCH 13/19] !fixup add dbg message --- .../Vectorize/LoopVectorizationLegality.cpp | 15 +++++++++++---- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 7 ++++++- .../LoopVectorize/early_exit_legality.ll | 6 +++--- 3 files changed, 20 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp index 0267fb1adb16d6..0dcf5ecec13b0f 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp @@ -1375,10 +1375,17 @@ bool LoopVectorizationLegality::isFixedOrderRecurrence( } bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) const { - // When vectorizing early exits, create predicates for all blocks, except the - // header. - if (hasUncountableEarlyExit() && BB != TheLoop->getHeader()) - return true; + // When vectorizing early exits, create predicates for the latch block. The + // early exiting block must be a direct predecessor of the latch at the + // moment. + BasicBlock *Latch = TheLoop->getLoopLatch(); + if (hasUncountableEarlyExit()) { + assert( + getUncountableExitingBlocks().size() == 1 && + is_contained(predecessors(Latch), getUncountableExitingBlocks()[0]) && + "Uncountable exiting block must be a direct predecessor of latch"); + return BB == Latch; + } return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT); } diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 5134c4ec7d225c..bbd560c75f5afc 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -9239,8 +9239,13 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) { SetVector ExitUsersToFix = collectUsersInExitBlocks( OrigLoop, RecipeBuilder, *Plan, Legal->getInductionVars()); addExitUsersForFirstOrderRecurrences(*Plan, ExitUsersToFix); - if (!addUsersInExitBlocks(*Plan, ExitUsersToFix)) + if (!addUsersInExitBlocks(*Plan, ExitUsersToFix)) { + reportVectorizationFailure( + "Some exit values in loop with uncountable exit not supported yet", + "Some exit values in loop with uncountable exit not supported yet", + "UncountableEarlyExitLoopsUnsupportedExitValue", ORE, OrigLoop); return nullptr; + } // --------------------------------------------------------------------------- // Transform initial VPlan: Apply previously taken decisions, in order, to diff --git a/llvm/test/Transforms/LoopVectorize/early_exit_legality.ll b/llvm/test/Transforms/LoopVectorize/early_exit_legality.ll index f7fd55a0f039a3..2a99693523d3cf 100644 --- a/llvm/test/Transforms/LoopVectorize/early_exit_legality.ll +++ b/llvm/test/Transforms/LoopVectorize/early_exit_legality.ll @@ -11,7 +11,7 @@ define i32 @diff_exit_block_needs_scev_check(i32 %end) { ; CHECK-LABEL: LV: Checking a loop in 'diff_exit_block_needs_scev_check' ; CHECK: Found an early exit loop with symbolic max backedge taken count: (-1 + (1 umax (zext i10 (trunc i32 %end to i10) to i32))) ; CHECK-NEXT: LV: We can vectorize this loop! -; CHECK-NEXT: LV: Not vectorizing: Auto-vectorization of loops with uncountable early exit is not yet supported. +; CHECK-NOT: LV: Not vectorizing: entry: %p1 = alloca [1024 x i32] %p2 = alloca [1024 x i32] @@ -49,7 +49,7 @@ define i64 @same_exit_block_pre_inc_use1() { ; CHECK-LABEL: LV: Checking a loop in 'same_exit_block_pre_inc_use1' ; CHECK: LV: Found an early exit loop with symbolic max backedge taken count: 63 ; CHECK-NEXT: LV: We can vectorize this loop! -; CHECK-NEXT: LV: Not vectorizing: Auto-vectorization of loops with uncountable early exit is not yet supported. +; CHECK-NOT: LV: Not vectorizing entry: %p1 = alloca [1024 x i8] %p2 = alloca [1024 x i8] @@ -141,7 +141,7 @@ define i64 @loop_contains_load_after_early_exit(ptr dereferenceable(1024) align( ; CHECK-LABEL: LV: Checking a loop in 'loop_contains_load_after_early_exit' ; CHECK: LV: Found an early exit loop with symbolic max backedge taken count: 63 ; CHECK-NEXT: LV: We can vectorize this loop! -; CHECK-NEXT: LV: Not vectorizing: Auto-vectorization of loops with uncountable early exit is not yet supported. +; CHECK: LV: Not vectorizing: Some exit values in loop with uncountable exit not supported yet. entry: %p1 = alloca [1024 x i8] call void @init_mem(ptr %p1, i64 1024) From 7b8866d8329d53e55239b7f824a2e2f0b186da1c Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sun, 24 Nov 2024 07:45:12 +0000 Subject: [PATCH 14/19] !fixup fix formatting --- llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 761f2949126e49..7d13f437e934c8 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -869,11 +869,12 @@ void VPIRInstruction::print(raw_ostream &O, const Twine &Indent, if (getNumOperands() != 0) { O << " (extra operand" << (getNumOperands() > 1 ? "s" : "") << ": "; - interleaveComma(enumerate(operands()), O, [this, &O, &SlotTracker](auto Op) { - Op.value()->printAsOperand(O, SlotTracker); - O << " from "; - getParent()->getPredecessors()[Op.index()]->printAsOperand(O); - }); + interleaveComma( + enumerate(operands()), O, [this, &O, &SlotTracker](auto Op) { + Op.value()->printAsOperand(O, SlotTracker); + O << " from "; + getParent()->getPredecessors()[Op.index()]->printAsOperand(O); + }); O << ")"; } } From b9ee73961bc3a1983bf3cc26d595e2acf751183e Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sun, 8 Dec 2024 20:51:51 +0000 Subject: [PATCH 15/19] !restore test checks. --- llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 1 + .../LoopVectorize/X86/multi-exit-cost.ll | 18 +++++++++--------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index a639f806e8e6ee..92b0de4abbbda8 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -57,6 +57,7 @@ bool VPRecipeBase::mayWriteToMemory() const { case Instruction::Or: case Instruction::ICmp: case Instruction::Select: + case VPInstruction::AnyOf: case VPInstruction::Not: case VPInstruction::CalculateTripCountMinusVF: case VPInstruction::CanonicalIVIncrementForPart: diff --git a/llvm/test/Transforms/LoopVectorize/X86/multi-exit-cost.ll b/llvm/test/Transforms/LoopVectorize/X86/multi-exit-cost.ll index 3517b5c484e37d..4e768074019d31 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/multi-exit-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/multi-exit-cost.ll @@ -5,18 +5,18 @@ define i64 @test_value_in_exit_compare_chain_used_outside(ptr %src, i64 %x, i64 ; CHECK-LABEL: define i64 @test_value_in_exit_compare_chain_used_outside( ; CHECK-SAME: ptr [[SRC:%.*]], i64 [[X:%.*]], i64 range(i64 1, 32) [[N:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[ENTRY:.*]]: -; CHECK-NEXT: [[TMP3:%.*]] = add nsw i64 [[N]], -1 -; CHECK-NEXT: [[TMP4:%.*]] = freeze i64 [[TMP3]] -; CHECK-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP4]], i64 [[X]]) -; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[UMIN]], 1 +; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[N]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = freeze i64 [[TMP0]] +; CHECK-NEXT: [[UMIN2:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP1]], i64 [[X]]) +; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[UMIN2]], 1 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 [[TMP2]], 8 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]] ; CHECK: [[VECTOR_SCEVCHECK]]: -; CHECK-NEXT: [[TMP32:%.*]] = add nsw i64 [[N]], -1 -; CHECK-NEXT: [[TMP33:%.*]] = freeze i64 [[TMP32]] -; CHECK-NEXT: [[UMIN1:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP33]], i64 [[X]]) -; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[UMIN1]] to i1 -; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i64 [[UMIN1]], 1 +; CHECK-NEXT: [[TMP3:%.*]] = add nsw i64 [[N]], -1 +; CHECK-NEXT: [[TMP4:%.*]] = freeze i64 [[TMP3]] +; CHECK-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP4]], i64 [[X]]) +; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[UMIN]] to i1 +; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i64 [[UMIN]], 1 ; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]] ; CHECK-NEXT: br i1 [[TMP7]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: From 95f427629d84a772742bf0f23251bbd2beecc119 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Tue, 10 Dec 2024 12:24:52 +0000 Subject: [PATCH 16/19] !fixup address most comment, a few more pending --- .../Vectorize/LoopVectorizationLegality.h | 5 +++++ .../Vectorize/LoopVectorizationLegality.cpp | 13 +++++++------ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 17 +++++++++++------ 3 files changed, 23 insertions(+), 12 deletions(-) diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h index dc7e484a40a452..fbe80eddbae07a 100644 --- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h +++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h @@ -395,6 +395,11 @@ class LoopVectorizationLegality { /// Returns the uncountable early exiting block. BasicBlock *getUncountableEarlyExitingBlock() const { + if (!HasUncountableEarlyExit) { + assert(getUncountableExitingBlocks().empty() && + "Expected no uncountable exiting blocks"); + return nullptr; + } assert(getUncountableExitingBlocks().size() == 1 && "Expected only a single uncountable exiting block"); return getUncountableExitingBlocks()[0]; diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp index 0dcf5ecec13b0f..555c8435dd330d 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp @@ -1375,14 +1375,13 @@ bool LoopVectorizationLegality::isFixedOrderRecurrence( } bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) const { - // When vectorizing early exits, create predicates for the latch block. The - // early exiting block must be a direct predecessor of the latch at the + // When vectorizing early exits, create predicates for the latch block only. + // The early exiting block must be a direct predecessor of the latch at the // moment. BasicBlock *Latch = TheLoop->getLoopLatch(); if (hasUncountableEarlyExit()) { assert( - getUncountableExitingBlocks().size() == 1 && - is_contained(predecessors(Latch), getUncountableExitingBlocks()[0]) && + is_contained(predecessors(Latch), getUncountableEarlyExitingBlock()) && "Uncountable exiting block must be a direct predecessor of latch"); return BB == Latch; } @@ -1799,13 +1798,15 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) { HasUncountableEarlyExit = false; if (isa(PSE.getBackedgeTakenCount())) { + HasUncountableEarlyExit = true; if (!isVectorizableEarlyExitLoop()) { + UncountableExitingBlocks.clear(); + HasUncountableEarlyExit = false; if (DoExtraAnalysis) Result = false; else return false; - } else - HasUncountableEarlyExit = true; + } } // Go over each instruction and look at memory deps. diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 9b3bae20c174b8..75c3d5128f2d83 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -3662,13 +3662,15 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) { // Start with the conditional branches exiting the loop. If the branch // condition is an instruction contained in the loop that is only used by the - // branch, it is uniform. + // branch, it is uniform. Note conditions from uncountable early exits are not + // uniform. SmallVector Exiting; TheLoop->getExitingBlocks(Exiting); for (BasicBlock *E : Exiting) { + if (Legal->hasUncountableEarlyExit() && TheLoop->getLoopLatch() != E) + continue; auto *Cmp = dyn_cast(E->getTerminator()->getOperand(0)); - if (Cmp && TheLoop->contains(Cmp) && Cmp->hasOneUse() && - (TheLoop->getLoopLatch() == E || !Legal->hasUncountableEarlyExit())) + if (Cmp && TheLoop->contains(Cmp) && Cmp->hasOneUse()) AddToWorklistIfAllowed(Cmp); } @@ -7857,7 +7859,7 @@ DenseMap LoopVectorizationPlanner::executePlan( ILV.printDebugTracesAtEnd(); - // 4. Adjust branch weight of the branch in the middle block. + // 4. Adjust branch weight of the branch in the middle block if it exists. if (ExitVPBB) { auto *MiddleTerm = cast(State.CFG.VPBB2IRBB[ExitVPBB]->getTerminator()); @@ -8248,8 +8250,11 @@ VPValue *VPRecipeBuilder::createEdgeMask(BasicBlock *Src, BasicBlock *Dst) { // If source is an exiting block, we know the exit edge is dynamically dead // in the vector loop, and thus we don't need to restrict the mask. Avoid - // adding uses of an otherwise potentially dead instruction. - if (!Legal->hasUncountableEarlyExit() && OrigLoop->isLoopExiting(Src)) + // adding uses of an otherwise potentially dead instruction unless we are + // vectorizing a loop with uncountable exits. In that case, we always + // materialize the mask. + if (OrigLoop->isLoopExiting(Src) && + Src != Legal->getUncountableEarlyExitingBlock()) return EdgeMaskCache[Edge] = SrcMask; VPValue *EdgeMask = getVPValueOrAddLiveIn(BI->getCondition()); From c3d3b390ab75ee201688bea179b07ad311e8a9d9 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Tue, 10 Dec 2024 13:46:43 +0000 Subject: [PATCH 17/19] !fixup address remaining comments, thanks! --- .../Transforms/Vectorize/LoopVectorize.cpp | 24 +++-- llvm/lib/Transforms/Vectorize/VPlan.cpp | 10 +-- .../Transforms/Vectorize/VPlanTransforms.cpp | 69 ++++++++------- .../Transforms/Vectorize/VPlanTransforms.h | 8 +- .../uncountable-early-exit-vplan.ll | 88 ------------------- 5 files changed, 55 insertions(+), 144 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 75c3d5128f2d83..b47386389c82c1 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -9294,12 +9294,10 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) { "VPBasicBlock"); RecipeBuilder.fixHeaderPhis(); - if (Legal->hasUncountableEarlyExit()) { - assert(Legal->getUncountableExitingBlocks().size() == 1 && - "Only single uncountable exiting bock supported"); + if (auto *UncountableExitingBlock = + Legal->getUncountableEarlyExitingBlock()) { VPlanTransforms::handleUncountableEarlyExit( - *Plan, *PSE.getSE(), OrigLoop, Legal->getUncountableExitingBlocks()[0], - RecipeBuilder); + *Plan, *PSE.getSE(), OrigLoop, UncountableExitingBlock, RecipeBuilder); } addScalarResumePhis(RecipeBuilder, *Plan); SetVector ExitUsersToFix = collectUsersInExitBlocks( @@ -10178,15 +10176,13 @@ bool LoopVectorizePass::processLoop(Loop *L) { return false; } - if (LVL.hasUncountableEarlyExit()) { - if (!EnableEarlyExitVectorization) { - reportVectorizationFailure("Auto-vectorization of loops with uncountable " - "early exit is not enabled", - "Auto-vectorization of loops with uncountable " - "early exit is not enabled", - "UncountableEarlyExitLoopsDisabled", ORE, L); - return false; - } + if (LVL.hasUncountableEarlyExit() && !EnableEarlyExitVectorization) { + reportVectorizationFailure("Auto-vectorization of loops with uncountable " + "early exit is not enabled", + "Auto-vectorization of loops with uncountable " + "early exit is not enabled", + "UncountableEarlyExitLoopsDisabled", ORE, L); + return false; } // Entrance to the VPlan-native vectorization path. Outer loops are processed diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index bf86bd2bb1ad52..81c76bc99fbf74 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -861,7 +861,7 @@ VPlanPtr VPlan::createInitialVPlan(Type *InductionTy, auto Plan = std::make_unique(Entry, VecPreheader, ScalarHeader); // Create SCEV and VPValue for the trip count. - // We use the symbolic max backedge-taken-count, which is used when + // We use the symbolic max backedge-taken-count, which works also when // vectorizing loops with uncountable early exits. const SCEV *BackedgeTakenCountSCEV = PSE.getSymbolicMaxBackedgeTakenCount(); assert(!isa(BackedgeTakenCountSCEV) && @@ -901,8 +901,7 @@ VPlanPtr VPlan::createInitialVPlan(Type *InductionTy, // 3) Otherwise, construct a runtime check. BasicBlock *IRExitBlock = TheLoop->getUniqueLatchExitBlock(); auto *VPExitBlock = VPIRBasicBlock::fromBasicBlock(IRExitBlock); - // The connection order corresponds to the operands of the conditional - // branch. + // The connection order corresponds to the operands of the conditional branch. VPBlockUtils::insertBlockAfter(VPExitBlock, MiddleVPBB); VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH); @@ -1047,10 +1046,7 @@ void VPlan::execute(VPTransformState *State) { // Move the last step to the end of the latch block. This ensures // consistent placement of all induction updates. Instruction *Inc = cast(Phi->getIncomingValue(1)); - if (VectorLatchBB->getTerminator() == &*VectorLatchBB->getFirstNonPHI()) - Inc->moveBefore(VectorLatchBB->getTerminator()); - else - Inc->moveBefore(VectorLatchBB->getTerminator()->getPrevNode()); + Inc->moveBefore(VectorLatchBB->getTerminator()->getPrevNode()); // Use the steps for the last part as backedge value for the induction. if (auto *IV = dyn_cast(&R)) diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 84c46c726d62ce..5406c7be3eac7b 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1842,50 +1842,57 @@ void VPlanTransforms::convertToConcreteRecipes(VPlan &Plan) { void VPlanTransforms::handleUncountableEarlyExit( VPlan &Plan, ScalarEvolution &SE, Loop *OrigLoop, BasicBlock *UncountableExitingBlock, VPRecipeBuilder &RecipeBuilder) { - auto *LatchVPBB = - cast(Plan.getVectorLoopRegion()->getExiting()); + VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion(); + auto *LatchVPBB = cast(LoopRegion->getExiting()); VPBuilder Builder(LatchVPBB->getTerminator()); auto *MiddleVPBB = Plan.getMiddleBlock(); - VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion(); - VPValue *EarlyExitTaken = nullptr; + VPValue *IsEarlyExitTaken = nullptr; - // Process the uncountable exiting block. Update EarlyExitTaken, which tracks - // if any uncountable early exit has been taken. Also split the middle block - // and branch to the exit block for the early exit if it has been taken. - auto *ExitingTerm = + // Process the uncountable exiting block. Update IsEarlyExitTaken, which + // tracks if the uncountable early exit has been taken. Also split the middle + // block and have it conditionally branch to the early exit block if + // EarlyExitTaken. + auto *EarlyExitingBranch = cast(UncountableExitingBlock->getTerminator()); - BasicBlock *TrueSucc = ExitingTerm->getSuccessor(0); - BasicBlock *FalseSucc = ExitingTerm->getSuccessor(1); - VPIRBasicBlock *VPExitBlock; + BasicBlock *TrueSucc = EarlyExitingBranch->getSuccessor(0); + BasicBlock *FalseSucc = EarlyExitingBranch->getSuccessor(1); + + // The early exit block may or may not be the same as the "countable" exit + // block. Creates a new VPIRBB for the early exit block in case it is distinct + // from the countable exit block. + // TODO: Introduce both exit blocks during VPlan skeleton construction. + VPIRBasicBlock *VPEarlyExitBlock; if (OrigLoop->getUniqueExitBlock()) { - VPExitBlock = cast(MiddleVPBB->getSuccessors()[0]); + VPEarlyExitBlock = cast(MiddleVPBB->getSuccessors()[0]); } else { - VPExitBlock = VPIRBasicBlock::fromBasicBlock( + VPEarlyExitBlock = VPIRBasicBlock::fromBasicBlock( !OrigLoop->contains(TrueSucc) ? TrueSucc : FalseSucc); } - VPValue *M = RecipeBuilder.getBlockInMask( + VPValue *EarlyExitNotTakenCond = RecipeBuilder.getBlockInMask( OrigLoop->contains(TrueSucc) ? TrueSucc : FalseSucc); - auto *N = Builder.createNot(M); - EarlyExitTaken = Builder.createNaryOp(VPInstruction::AnyOf, {N}); + auto *EarlyExitTakenCond = Builder.createNot(EarlyExitNotTakenCond); + IsEarlyExitTaken = + Builder.createNaryOp(VPInstruction::AnyOf, {EarlyExitTakenCond}); VPBasicBlock *NewMiddle = new VPBasicBlock("middle.split"); - VPBlockUtils::disconnectBlocks(LoopRegion, MiddleVPBB); - VPBlockUtils::insertBlockAfter(NewMiddle, LoopRegion); - VPBlockUtils::connectBlocks(NewMiddle, VPExitBlock); - VPBlockUtils::connectBlocks(NewMiddle, MiddleVPBB); + VPBlockUtils::insertOnEdge(LoopRegion, MiddleVPBB, NewMiddle); + VPBlockUtils::connectBlocks(NewMiddle, VPEarlyExitBlock); + std::swap(NewMiddle->getSuccessors()[0], NewMiddle->getSuccessors()[1]); VPBuilder MiddleBuilder(NewMiddle); - MiddleBuilder.createNaryOp(VPInstruction::BranchOnCond, {EarlyExitTaken}); - - // Replace the condition controlling the exit from the vector loop with one - // exiting if either the original condition of the vector latch is true or any - // early exit has been taken. - auto *Term = dyn_cast(LatchVPBB->getTerminator()); - auto *IsLatchExiting = Builder.createICmp( - CmpInst::ICMP_EQ, Term->getOperand(0), Term->getOperand(1)); - auto *AnyExiting = - Builder.createNaryOp(Instruction::Or, {EarlyExitTaken, IsLatchExiting}); + MiddleBuilder.createNaryOp(VPInstruction::BranchOnCond, {IsEarlyExitTaken}); + + // Replace the condition controlling the non-early exit from the vector loop + // with one exiting if either the original condition of the vector latch is + // true or the early exit has been taken. + auto *LatchExitingBranch = + dyn_cast(LatchVPBB->getTerminator()); + auto *IsLatchExitTaken = + Builder.createICmp(CmpInst::ICMP_EQ, LatchExitingBranch->getOperand(0), + LatchExitingBranch->getOperand(1)); + auto *AnyExiting = Builder.createNaryOp(Instruction::Or, + {IsEarlyExitTaken, IsLatchExitTaken}); Builder.createNaryOp(VPInstruction::BranchOnCond, AnyExiting); - Term->eraseFromParent(); + LatchExitingBranch->eraseFromParent(); } diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h index ee5481718cbfbb..f7b09c1311ef7e 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h @@ -124,12 +124,12 @@ struct VPlanTransforms { /// Remove dead recipes from \p Plan. static void removeDeadRecipes(VPlan &Plan); - /// Update \p Plan to account for uncountable exit blocks in \p - /// UncountableExitingBlocks by - /// * updating the condition to exit the vector loop to include the early + /// Update \p Plan to account for the uncountable early exit block in \p + /// UncountableExitingBlock by + /// * updating the condition exiting the vector loop to include the early /// exit conditions /// * splitting the original middle block to branch to the early exit blocks - /// if taken. Returns false if the transformation wasn't successful. + /// if taken. static void handleUncountableEarlyExit(VPlan &Plan, ScalarEvolution &SE, Loop *OrigLoop, BasicBlock *UncountableExitingBlock, diff --git a/llvm/test/Transforms/LoopVectorize/uncountable-early-exit-vplan.ll b/llvm/test/Transforms/LoopVectorize/uncountable-early-exit-vplan.ll index c0f684dc96a747..c45634913ce0b2 100644 --- a/llvm/test/Transforms/LoopVectorize/uncountable-early-exit-vplan.ll +++ b/llvm/test/Transforms/LoopVectorize/uncountable-early-exit-vplan.ll @@ -87,35 +87,6 @@ e2: ret i64 %p2 } -define i64 @multi_exiting_to_different_exits_load_exit_value() { -; CHECK: multi_exiting_to_different_exits_load_exit_value -; CHECK-NOT: VPlan 'Final VPlan for VF={4},UF={1}' { -entry: - %src = alloca [128 x i64] - call void @init(ptr %src) - br label %loop.header - -loop.header: - %iv = phi i64 [ %inc, %loop.latch ], [ 0, %entry ] - %gep.src = getelementptr inbounds i64, ptr %src, i64 %iv - %l = load i64, ptr %gep.src - %c.1 = icmp eq i64 %l, 10 - br i1 %c.1, label %e1, label %loop.latch - -loop.latch: - %inc = add nuw i64 %iv, 1 - %c.2 = icmp eq i64 %inc, 128 - br i1 %c.2, label %e2, label %loop.header - -e1: - %p1 = phi i64 [ %l, %loop.header ] - ret i64 %p1 - -e2: - %p2 = phi i64 [ 1, %loop.latch ] - ret i64 %p2 -} - define i64 @multi_exiting_to_same_exit_live_in_exit_values() { ; CHECK: multi_exiting_to_same_exit_live_in_exit_values ; CHECK-LABEL: VPlan 'Initial VPlan for VF={4},UF>=1' { @@ -272,62 +243,3 @@ exit: ; uselistorder directives uselistorder label %exit, { 1, 0 } } - - -define i64 @multi_exiting_to_same_exit_load_exit_value() { -; CHECK: multi_exiting_to_same_exit_load_exit_value -; CHECK-NOT: VPlan 'Final VPlan for VF={4},UF={1}' { - -entry: - %src = alloca [128 x i64] - call void @init(ptr %src) - br label %loop.header - -loop.header: - %iv = phi i64 [ %inc, %loop.latch ], [ 0, %entry ] - %gep.src = getelementptr inbounds i64, ptr %src, i64 %iv - %l = load i64, ptr %gep.src - %l.2 = load i64, ptr %gep.src - %c.1 = icmp eq i64 %l, 10 - br i1 %c.1, label %e1, label %loop.latch - -loop.latch: - %inc = add nuw i64 %iv, 1 - %c.2 = icmp eq i64 %inc, 128 - br i1 %c.2, label %e1, label %loop.header - -e1: - %p1 = phi i64 [ %l, %loop.header ], [ %l.2, %loop.latch ] - ret i64 %p1 -} - -define i64 @multi_exiting_to_different_exits_induction_exit_value() { -; CHECK: multi_exiting_to_different_exits_induction_exit_value -; CHECK-NOT: VPlan 'Final VPlan for VF={4},UF={1}' { -entry: - %src = alloca [128 x i64] - call void @init(ptr %src) - br label %loop.header - -loop.header: - %iv = phi i64 [ %inc, %loop.latch ], [ 0, %entry ] - %gep.src = getelementptr inbounds i64, ptr %src, i64 %iv - %l = load i64, ptr %gep.src - %c.1 = icmp eq i64 %l, 10 - br i1 %c.1, label %e1, label %loop.latch - -loop.latch: - %inc = add nuw i64 %iv, 1 - %c.2 = icmp eq i64 %inc, 128 - br i1 %c.2, label %e2, label %loop.header - -e1: - %p1 = phi i64 [ %iv, %loop.header ] - ret i64 %p1 - -e2: - %p2 = phi i64 [ 1, %loop.latch ] - ret i64 %p2 -} - - From a875249d84db6e192633e7e9c62b46df0a171624 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Tue, 10 Dec 2024 14:05:43 +0000 Subject: [PATCH 18/19] !fixup update doc --- llvm/docs/Vectorizers.rst | 13 +++++++++++ llvm/docs/vplan-early-exit.dot | 41 +++++++++++++++++++++++++++++++++ llvm/docs/vplan-early-exit.png | Bin 0 -> 131943 bytes 3 files changed, 54 insertions(+) create mode 100644 llvm/docs/vplan-early-exit.dot create mode 100644 llvm/docs/vplan-early-exit.png diff --git a/llvm/docs/Vectorizers.rst b/llvm/docs/Vectorizers.rst index a4462e53edda09..f134a6df94a69a 100644 --- a/llvm/docs/Vectorizers.rst +++ b/llvm/docs/Vectorizers.rst @@ -399,6 +399,19 @@ small trip counts. .. image:: epilogue-vectorization-cfg.png +Early Exit Vectorization +^^^^^^^^^^^^^^^^^^^^^^^^ + +When vectorizing a loop with a single early exit, the loop blocks following the +early exit are predicated and the vector loop will always exit via the latch. +If the early exit has been taken, the vector loop's successor block +(``middle.split`` below) branches to the early exit block. Otherwise +``middle.block`` selects between the exit block from the latch or the scalar +remainder loop. + +.. image:: vplan-early-exit.png + + Performance ----------- diff --git a/llvm/docs/vplan-early-exit.dot b/llvm/docs/vplan-early-exit.dot new file mode 100644 index 00000000000000..63490b0cdb2e43 --- /dev/null +++ b/llvm/docs/vplan-early-exit.dot @@ -0,0 +1,41 @@ +digraph VPlan { +graph [labelloc=t, fontsize=30; label=""] +node [shape=rect, fontname=Courier, fontsize=30] +edge [fontname=Courier, fontsize=30] +compound=true + N1 [label = + "vector.ph" + ] + N1 -> N2 [ label="" lhead=cluster_N3] + subgraph cluster_N3 { + fontname=Courier + label="\ vector loop" + N2 [label = + "vector.body" + ] + } + N2 -> N4 [ label="" ltail=cluster_N3] + N4 [label = + "middle.split" + ] + N4 -> N5 [ label=""] + N4 -> N6 [ label=""] + N5 [label = + "early.exit" + ] + N6 [label = + "middle.block" + ] + N6 -> N9 [ label=""] + N6 -> N7 [ label=""] + N7 [label = + "scalar.ph" + ] + N7 -> N8 [ label=""] + N8 [label = + "loop.header" + ] + N9 [label = + "latch.exit" + ] +} diff --git a/llvm/docs/vplan-early-exit.png b/llvm/docs/vplan-early-exit.png new file mode 100644 index 0000000000000000000000000000000000000000..3cd293bcdbcc822e6f7e7a6d36582fbea2cc4744 GIT binary patch literal 131943 zcmeFZWmuG9*EI|yD4?hah%|}X{iUw7#P^a7#LVPIA_5p z)o*vCz(1Id%98gm^1CUP!G9i@Jd!q*lfz&HKjUCv`dVOMp}zwDkbyrK7-y0&G0uR0 zG0~qTL4N-hyEy60@1L=D&|e&K(`>`Q5XF#wAg1~hb8Qm0mU#Q5<+~l;?J({)4E{Gn z9^&-L-(!pr^3N4P)^|h-Sp~d-wq3oUHEP1-VSN5&W^R~F=Ls@l`b2wi%5Kqhgto0{HCB;cNM~GK_Jl%zs0`!n$fE<5^PMY%c2`Rqh z8=ClQ+-Y23hr6pSQ*`(@{){tG1_o^Ht03zR9wwH|1O1o8>TFz=aE7qdFKj=LVz>xv zI61E2<-f=JclxsBz+jgtmC#~cW{`x9+tzelAmhxWxD;H-l*Nz zXI&@keJ%Rm`!DyUN;X%kp4wtbBiK^R%4=qR$EP;sw-X4p_mO=jjSJpd7tgdTv_s=h z7{SE4!VJF|KpBA~zz|P4w zc%fvGd+S|EVm_U(Njb9BXU(uKXVSsc^EA~45=MR>fzC#sZ;1x4edVX#Is4<$U@2Fw z2|g{XIypu%9q#On+r?!hdX06)3k<_^AD0;NP18IR-NkWQ8|mwE6LQ;{TN%CXA@+A@ zlcc~sk+k_Ik^hW8L~)JN-X}J6veAXw6$?FycDm4AwSg>^jQOD=aMTu4;nx7BShEDT zW(LG=sl`BNkI0F&7iw3-aXvz0!L_9{_-`Z=g<*qp?_(8u{2YS-HT|htw}pa+mU^3@7IxLk4`>8VHr6oz&RZd5u9YQM#6$AOT_>4i6H#n4 zkzvZ5dWxUFsYVZP6esfMFOpthU1iaJbWieQ^8ZNUL^UAI7>eBAx%6`cZxG>-<>|bb zOndl0VlXZVO=$c#LMg<5Nc1pe@CBU}v*_9Te@FD6pRS;h!Wa6`jlWCd-TBC)oI5RL?n_j5d5E0pdbh)m>)dD-sGRt|d!MloINgkD3$Hd$th1Ti&RYG^^#cSdV&EvwuKq5#Y6P<1LvmQp=PbVL z_Ci+||7xz0%ZR}_1d6bMm-%W=W1{!5g|63u$@cEBUd^ztaLCF~p4Rt+(IO+slBTod zL{^Apl0RupGzRx_D@zGs_m>;bL@jX83sg3iv#l1u3Df2NwB$>cWrOODK53IRt~>%8 zl}0lzmI#0t*zM|eBgq+#R|^^-s~S3wOHAbk(jJnOEGBqVHx*~ErXC;lNm9q0qk7sX zh?sC(0Jjq2G=XRAx;oT#;Tl`xT5$)9k}KsyXnYQM?g_!|PL2f>Vwxv-*4Vm2x6VD? zd37jVqd?bmrV(aZ(#4%q+BIsHXtPnXDV*9^xw5x0Ij-%oUS=AhrkU1ztz=33d9PWjXCh8!mCM5#UWb(aS7ZC@`5Ws+OS#d6x)`7P0j4I&EcQU?p~ zwRp8qx>hDi$4s5Cu8OQ?+2Ol?YNsUWL-web@9v5J_{>jwrl^HV+<@_iDRE*H13vpU>UlxkwM z+!Lu~yF7wXw+({9i&fF!vboyz^6}<=1U}D1Z`DGq)w`GHN%*-h&yOyEPeP4CP9{CO z-|;0Nd)+9xtw$P&1NUpdg@=m7%%^Al4(msuh{<>mbDG^haWllLomh)bRZw$Ru8fe= zJcg?07B!P~?<@~M$K?+LVmQoRxfJo7yqoqy=Jj7+-ChGz=W@8Wz6#Gv z-p3my-V6E@%@m;!qibO{CwS@T4J>`75GG^fQ^@D)_VtthMy7njD~S-Q^$Ny*hcu2} zhPh1Ih(CQkyyg(@Y~7A3%R-3jKimHqQXT-my;LN;s;b5cCJ zQY`DM@8lB%`Euyi#If$1DPi*6+}>_eDmQdo#&`MFPFwFR@*h8hF((&3_;hcSMyEx zzkM0xmeJa@5^Ays(TqGTrY)TTzh|b&Cn$gPJZmi*>*l!i;b-o@L3f^3l%~eBn@chP z19Iy|k}9S>koI}ZUIEy@m1pGL!|^e0hQ?fq3WI>x#Ihx+H6K;=@`E96X7B8q9Xe836fauteh!8N zS7+IwR`YAR@wp|WYi7rzdjd#wpEg;8NMH)G(ni%@R;2xRkgM*2yB`V7`_@LE7!6kE za90CWx|bp)c1Z;L{VXn-yv_t6fvxMPtq#@=w;{RLyaCGhZ`b+S^6P-$ovxmDgp$s$ zAE(EiKz;6W75Fe-=kMIjxA>`#!sb*uxU5NYP(t!jSd|ftWYVrD`D4s7Fl(0LSU0B^ z;b>Evn509~Dx6Wf!!-wf+oYf5*A*}g=D*c1r35MmIviW76h{j~53$b1IZjE7p25?6 zc^gl{g#6RR0k0AMc4q4V;um>Tj}%gCBhH~JXM+R7?+X?7J&Pb~%AaWRrE)hPLI$p) za*w{Rk5`1;@{8v;4jN+pd}X&pj)$Dn!dhEwEj$Yf8Cb%NOAzviP4qgvUB2Jc+y3RJ+#~vD zT$QDR9-ZvZhD=6L!7dGW2i++>(CZ?QqzEhbU7B98_eeuA7~=}u&gj2I^iU~@Z8&9C z{jydT^@qsjAaZiK4;6c3RyB)yF!3OB4LrXnJ+D!7Rfp&lCxXDM%1 zqh{2u{oQQHt!$(1tvJ-)c+ZGnn{G~t?$t$xgIs|yU;K!K#in!A7f_#k^K(3}%88C8 z-#^Fm`L*!C8^!R1xo;mY@o<=S`gwNIf7xH6{hJK!L1D?#y?w6$UC`)Z2H75il`|%g^yeSMLc*z? z;Yzks%V+V)Di3^!lW?}#<}PQufXXwCkbNfC5(#rN z6(`3p6OKLjb<1>d)`r#2s|yQ$wg8-ovlz}3MO91?UmIkY^(30dVU5&xp=c5a3b;u4 zDPO0i+~eat9u3atqpTto5N8>G$8npAs%0~F9TAXxL;Bp%>6|t(A6~Zo#h1Do=7jxu zS64+}N1R8p-!Upv z(8>&emTemz=R4em3nMdzb9-jp@0LeAdr8MNsMoq+t?Lcno5_RY_t}h)+`LO8o)g_5 zS`k}Pb6?&>!-{SI=VFT1!^0?JsIqP1S_GWpvdXu=51#Fz5J=X|6h^8QH_a-x7!Ew+ z-IIXnanyG-?0-GqGBtx!7IN3<>q&{_XJU<=i3c=PLtztHxgR*05lQU zOAl88g~j|BFKTB%sN(kE%h6iu9j_s?H(hNTBT4y2cEYOP?&#Kz6j?Y@2rQ?U49E*G zFUIkUs5OlBO+SXd8Y3r7Mi|(Q@VQR8nThM6>Wkj)@XN(<<693eOzn*e7If^!gcX;( zg)BPjasCDVikuK5%aQ!oMcz0U%UJQyt6ua1BzDOzL}>4cRxxbt9o_TuM`55{s!N;CbgyR zu!a3kZ3%mGHFj~tqNTTO%R z?Tm#;9H78v3_~RMb$Z#P?-@B3N!Siso#7jeyO4lVSbqprO8js&$XeEB6;Wo?5*#e2 zcez};$GvzGAX4@XrZqq7o_N*AnO4o4(Q9%q8OzRZ8_^N!AQc3^Kd_Fk*{nyh>lU~a zw~g#&ygHIFM#htSaJ2RL>B~HY719>lG!FFBcgj*ncujV%;JkoJN!%*M`A zm5ht39O`lZW?{o-?Y8_#x?xU>-`WU+1x0Ro|yz;S-_4 zj?JUgUE8YptJAv%8s_hlP|S;ub@AA7{nX62O}kJgi!FYTb3m3&KsZUX+Db* z8n#M4WCtYU^zJfKc*`SNCm)&da+^tlwf2VihhcdV`*ua2uU2uYD36a}#G$Z&Q1j%LZ`^J|;k+3!uOd?oH(^xcX{bQ!aF%#&-gOZ1=@ z$5iP0UBgd)xri@i$q7Wg9^$FU)X3!fzGPi_GaADCGiMsq?))rqe^ih(>VUy0a_GN? zo=aZLG;{Nb$|AdSL^H~w0^=!?oy1bRj5)c*0ev?^lsanNwqJKut=GgT%QMVNrTgE8 zl7D-Y+|tL9c2|ikQugg))^gih`kE|eL6=z;O#n%{-b=r|C(^t*EN@x${4JhD8eRB9 zX4c_?nC{emqX%+*mz$N}y1Q&nO;%*nz1s08*tZRz#nTyPt^M{~DHdDC3oCrARTwkW zaaLy1q3u=_R(YzZ3mli$hphI?EAk0dVKL{Up;3Cs>*u3hpIN!W^q`Ta3gFN@P=8y` zk6Z*3%D|2pG%}cJ-jE{LftPKn6|OOuUh!?>D9Ps{dk`7dmNrT0H&Ec=ck<*WX>+I6 z`s`&nw_A~Oeo%ywVqnR2FP@z0Lq8&!bX((v^-V6>Uc7z7KyF?bm z>?_;!!8$~N*3l-wudxfY)q2i0e7+rIy;%B0&wlHg_43E%jixbJ{7y^yYoDTHvR~?Y zuViJMow}fO841ck{7GF!ah(FCEpLyqv>wWado<$;ujc>aR{QgpTZ6U2_dq6rLGa8~ zOC=Y%RrJAopUBzo>U&R3w|AyOC35)U{0RA3G_Rs7atCy@4}@GY@>zWU)S zP~dyyca%Ok+*z(ybKhAq7}B(2PUXZuc)GhbI^a63dDnGgqH)*d>%&wygO^F;GfWr{ z@lpBt8qbMJ`;H*S0=1k-QciQl5xOfntSY*?2a7cqD0tbPkf@}~;%XBe(#auSwCQJD$qeSvJd5zrxBEPyS%STI)~lxXTY@QROW#an>v?{EX}3t+d#T-E zy~f}ZaW&~zgszIp0aaL@zXZo*sQ*(bRmA0`!JMd;{-;+#9c){V-xb5rVs86f`(f`~ zxxu(?ZxT!?-}*;l{8eARBmzW|6!b^EIVUD67MK)%u42OB;d^Q7M7IS}9{rU;wfVU& z?kBOgFRzs~tY~w4A0IVNn&2s$u3o7eU}TCAskNJ~Us%a0;J*Lr!d*P>h=;g%#&)&e zoB1-7ab`!res9FxIZ80Z8E}c_kChF7;|&AYY2JoDM#BBv0CsT zX!+LF;T#QCbH|~1hHvW?(}(3rec%C}??r5ix;RrBRc)HZI}f#yive2c%y=+uC}rgt z_0#Mv4TycX{EwZ8!BugX3TJ}_Vv}k!d8R!bGtMu3-0JHNV zTO(5@&bnB7CZB{wd#R@WorDDP#+=*d{Ax`n>dk~z zE<8297{}WnCgM%h0t`kMEZq)q-8x~4z%vFQl%t(57rM7R&|I0bbpKMcKFCm_8PCbB-D+Q0N)6f5B(`|`mQD_`+5l=k*y{N@ zhrF^OytNM{J;LOb!pls~K}5w^IwEAZP=HogsPjL1qgM%5xPH8vqvu;Do9R#bvED8w~ss-H{uDsQbu=HPm#*Qe z!oDxJm8ptPAMd1`c35>(xZN#)v^@r-R2-n!x?G{j8Vr}G0sT5{E1MuVDw`-g=?cA~ zj)lGA<8WW2rN|jjpjdMIyl6ND=e6Ha7+x4DGJ?h_Uv>cG8bU{}n`!FlEIvTrKcX;Y@pOcSQ_Y+|1b$@QBcHog6H+86UDz zQ}1x!TU)M)moUP}#&1&`NwNuTfKf z{5iMqIU*4ca5`B^mM`u_dZ7-nH;w@d$g^TLF%JSbH=5?rF98*+#>}CEc9xSkm`}a; zs}jS}{iL_QX|<-rT}2@gJP)H-x~jgjC{)SuCQu70CaSQyTtz@e=_py6s8OX$s0Z&t7VHIADmIf{ju7Sm92LRLk>FS1~ zt-w*E5FtbS=Zq-vL}93`V(_;h?y=UTWo#J}%({XdA~E>ZpoZSOvlDdWcqu(LzbeTw zF^d`d0~hWl8hSlaa~{(5Sa0OdeehQqN}2%6Hfo%7@hO35k~Q{o2dxZ9g;!P4%c}_U zHMoCo=4z5)VToCf%Ndd7)Mk}&Q>0qcO*y)X-65?SOt5YmraA$lu#s=_#Lf@_VOFt= z3EFCzleeIyftoUj(&_ZMB)p_WzlJx@IV|T7!osvGAvRtFJ6(P;V9^e^ox$`W^Us%j z1%t^0^c@GEd9wBFzB%{hYtHiQ#Bvf@;g`hbL9)Nw{`q*)bsAgkeAxzI_j%CjjEvS5 z)$l7VtgO&pm$dqg&m?-ChnJl{W?c)IY5_?Ak3w#JSV4zJr`qXm%oI;`vI}53_qjeg zoC-wk>L3t}Cb(auz{F~o6Fta`2>6gA8^iuN-gvzpxv{NjL)1yZrvGfQTARf3!MfNv zN}`d@7r2B!WGX86rQ}BU7^xgl^h+9%^2sj9N4Vf)6UCGWL%cU-JtT{5$}LD zfTF;4uFCeTdH56f7N`&_`3i|c@ns~FT)@uVZggT+LQsZJ=>u?Tdl=4O!&p)Nyoao8 zB2SCQaahOIZDF?_)HCaA3d8Z=R5>n^rMdQ-q08pT?`-mX^F$IxW~wQWziEUI56;-) zvk(o?I1Hp(Hj~!S>m8K!65m`lCfus#v#{|-%y+En+tZ?SSJF9gV?`BH4TxojAK16g zPJ;STak3A`1bb<-YLSZ2L^VUt*1RB|Q4}?#W%olBvns2RZC*hzS47rF4n*utw*!#d;#os33_q z9k*qbY*#^$XyY{Rr7?1j*OGIIAyeBdgFW}hd&)HE=U~P(L!HhL|3YFCnKmJ9cb`Wp zTRtJSernT0=z0=Grx)!qDnUXqHYSs{%UOp>M;3sYqw2R6^Si6VJTlU6<@_B}K3nhU zIg`&1pp$i%^RUihI&J@_RA#MmC;I*HB3s?ASgsCq?qR^$bhXVX&p`~hEyim&7j5=f zA}Xf69QC{+N_B@%01mcYfoIKXbU7XXrE~%=Uwsy}<-^1kntvY^{KzPX>nT~2%A1$J zaUTZ69tU3NjS*6Wj~|qJckG8DfXx(WJGOYQiKm|} zVi35s0CL120O#ZeJa^lR@^X(6H^RzT}o?ux)5VFVI(yjGL68iT#Z$#G3WrEgNY0RV{-0Z((?5r z_rnXkv#&WO$S;fSe$_=Jo*dCTwI^gFhs;gW{M$6#j=s0ZZBEr~4|-<0I2%2rE%#9- zP9y8@;+vf7ccz}-ma|!OQ#CkknNgqr1Su4ni$Eq8Izh)zavt=^hOAYFad3auy+qaN;ZEUji`B#=#a zbCep>i#l4{evv|*U-95xwZ1%Fb+pyNu>RP&T4nZos*412HU*^LQO9S4Y3Lq}>wC7(B74&0WZ|TP&U5#))tC1J=5&gkiTYJ9o&^1UzC`wZ`aSqR8U)N=dH5HyOblBH&?^zq0(K%QBpW_75cf$_ zgSIDPFNHsFYI-#C-U6jYgGrhd2eruw3c<4{;b#HhdFBEa!TOt95AO}`=DZJ^@Df*; zVIY=f5npFQ;hpkO!gxX1)RHBwg@))5tniOJGd`Ct4i13Ig~#d1EmfcEjJIhrwTD1X zz*Q1RU>q9;3CeuR`vi(8+Sk@pOE91JQJ<+W?TW49B(7YSVahX>=U=|bQzpuS`JplX zeV*#rl}UK3hONnDZST!c8S#`j=F!M3Oox%ICT`arR8Rv})7COMKwJ_b@x5S{Gciwv1A!g2;; z`TSc;P>PoC%TJ&0g;|}9QvIDTLGXl8i*AdVAY*r8%L}Y?mb5eGU@@9=mQUaGhLJZH zD!=Wsz?uVS$(AU(Q_^CjFgcF)QIk~=M5FLdcVo%x2!vKs4FjR1DZhG9`mk|WyGWj3 zse$CMnI%uByPRzX!D=j=EjT5q0n*(rp65?yLq!@%OB6+a$SU$cH%F>b4Q>d^dp$a*7#!k+EFhn{kF+vG**Wj&=HXnb@jPP6 zEg3r$pjgN!zkCrdaz$bQ0R73nUG79z%k7l?>f^)JAHs6{>z$V*x=(tN^SPk)?+>&2 ztJ9rpt4YnalWR7$pjXXrXx5!`AU((>hX7^T^}S<5{v_9ubYZw5bxovLQf!pOKo5ZAQK%wM_Z{&1_}+)l!nr1Dt%GNc}w&#z4JVofv6 z63C$yafWkK9-CY;caxIZ0jDw9(!`~_1IQ%~yriotO1cI?F9vZe=k-ybSnGO@eSV?< z@2usERERXlt*&~S<)fx?YPCe4ZW|XhI8KUYO3*M>^4m=ba(EwmBGT%;jt>?i4(mT( z)w)&}o?6`tMK~fsN!SUBBD1vz&KuvCqTw2-+{U`#hqRR{1*%Cs1$CY*Q4J9PYOnAP zZK#}%NOhz#OclB51sgYBKD2{>cQz z9}QJ;0b&%$bWdr)%;zJTHqTZO#k?9+JwUrbrj|*Nfw!_;bW*7-`3Wj0O)1e3M218tedhQNj_Qo>7dNI2H4$o;v(g`6Ei> z55X#a`*diD)KzH*z`W84YJ}w+ z2Xyrb6tTnn;efF+t2j`oN25&wptQVI$ij5`4rtJ&?PygI@t-FLlp8?n9H7|qD0%Yb zgJPXGs7>z;lJhydw-_tU0tTPpEMWV}w8{L(O#cPgG_Onr#Qwc|T#{?To==x(T;TWP z1zhuE*iB^MU6~<&w;KlJIv5mR(UiZ;D{!I##^#cnP$=*K@D;2N@cy|k49Gm11k>&j zZ298?3QrcTu$|m;hv#XQwOb)iX`atStI(4EFh`I5AEm>D;R*0|#-i2r`PS9p0y}_d z3u;T{QvcpFPIwK*TcJoKapU*Dhu!RbFHM-ba|5te>}WO$_;D1vB^v;IiUS!k2J_6> zXnyCFxd|%#f2fcxux6ro3~>KEfhg{I^9Os4urreyxYTsMXlno00ix2lfJS>SPk+wm z?}z{V6rKQh-+qA%3W7fe{quVoMP+cghbvAFY`;(X@9&?9E~5>9b&l->r|9HUOBgUV zx4`)R|GkManH6*1Yrl5^ja(AoAQr%6N^4!|sQ|(ymzGVLnqW}h-#PzrpP~|A9g_NI z{=Ok;V)OCxJW?Tdr`$=q!E9o&wMXI>|Mms6-VwD7>y&{1+C{JzcxdTjFJUpYYeYcW z=bvE^^+!K^5n=n6UnAbw0GX_I<^XP||k3$n$*9h!mRzU2EcRm9Y0uG?hJRTY^xemekL|bUw_U0*B z3h|%kgFOHvEe~H#DFH`v!oJV8kk^6wA=DEF*e?1L8eYhsul^X~WXSug(o6oS@nrt2dj8(lOG275sV{4zzmtS&33^azH1sc8nkhKE4{#sISjn#l*T_Ab>~D4O{Hn_Ajs zz=C)jZ+j7~1EE(1(L2E9yD<1Mh;1t3E2H`3Q^|#f37q?JQPJ<%aZ^nn7$xu`Sd6>4 zhB<-Us-=ACt=FG`_v4ZOKf%BN>%;5PZd1cnHe~w6e{TJms1rDUU)RX*?Q?~tH$#DX zAX6!|F%^e!?V|Owh`+=5gjy!v&QG|~_;*a|S$#{l7ge)-7~(?1tb>_^A8 z!yShwzfLnlf0?M2F3&HK?6hVI_?i#sh5uT>pa?0Uurd+->+Nud_02j&+h;IjWQ5F@?9KUe&6Efn}xs% zsqT~kvW#hJ5O?-|jNR#J1=AjIAFKoWcjw%HqgZTo_ra6T2?fkJ(0Y`88xWKvIt@#g z(eoq9jt%mjvE^X432^qCn|8m8>GVk&{&8yYTfL{MV( z7BM5}ay0b8FmRA)KM~vA2(SA$OR8ZdxUbxXjH=1*6Z{xXSS1z!JW)5-f1T#vWrYsi zbK+obbRSs|{TcpKcZ>dvh6SuWd+GlFv77#$NmLhoYcC@t{+W`~r~7(^CY18OHvtCr zza#N`b^UiF{*FW@vBv$N9bp*bnk_?gpEF zBL;{4*H7B9Y=F^Z5Q489qSZ_ zK*H=e+;RMd6CV8RIefF>#7lhU5Ksufr*dG{dj>!uwW|x9b zfh9I@n)9>Wx&)#$XJO#T7Ru|~pj(2&ZOfR$ZmPC5?-+0kVHE~v(9!HxA!vxmS_vlTvmjyUCb(2gwBK<|qUi>{l;HU#|iMQSt`5;*B$}^x9 zjUCVs>|{!EXc`vp78~%GIs*PvV@;QDA4nY*9IhMMDkY{}0rQ?dqHnM@D{O5*bBPrd zu!i2GJ`4S8*psRR*!@MKXmtpv1O*S5Qn89UcIO-?aESc>cGTJZ?x;hUjyo^`kPX4M zgl8I1dF>l;00wItSjj6DL~lc*5~MQuPadp%%dh+z0I%@;1uuRC@v9(+{{ZZRwBH64 zL`dhe;I#EXNy$!#zq+iOE*qo#UMOcGkI!-88g?0HY#vZ4A~d!L0coKry8L>_$xkDO z%jzo3n)O=bEp^eRHMF$~`jXF&+^)6{5c|{y*~;hshJ2VJgy}`L_$`nnqZ@&e(g`SC zvudYhL*Sq>2donL06Oi0rt6qn4D$N+KBbVu7E@`~5C}d7yRgYJEN}8v-x_ zy8}{`8LXKON7c6xEHm5uss*wEwD1DByjT)F&mzMLJX;0OS%wx5h&$TXh*-$AyD4LCXz6gf{~@cIphMoXVW8Y?oGsxZoj_}5kt{lz z>nKBCfZm1BZREKLltm=}K!PM$keHR}=wm<11~*<3InvLs+2H2@7Kl_)YsDBrw_;M> zC;EyJZ*(#+_CEO&PL@)}YUcsGkq+>)DS*X{0Y+XxqVPoC1r8Z>tCAzoOQV72JVXH& znd$DJ1|qKqxHzN|fE#IjjuKz!XIuO4HI2vG=(qME@r?VkXH4G!B@+w!%_ii;?|SZk zdo5;d123~0E*@6Fs4?qFFzsNjxD!iLE-!=FnB+)^1L9;XM>p?BIjF-{8$IayJD{88 zKHUua`x@;^Aaw90I0si{VgWIm3*CMf3+VS)a15W*)T;B)fz8z?Y2{}#$f9C(fLr?n(!W$7Ii1c!^k^#(P8t4K(%#K@-`FjLy!}x zF$RFW!QY+^XAXVVpCAa{aj*c|t?mMHp-Nyh)z^Ol#9)gfHD#>Iwb@{=@uyfFxPlqM<_8-Q}|0w8wrtcv&H zGCjII&6EO_hf7F2vocvzfefj6PU2;9d~~po;IVNR-6BT5ghQTbGmsuc%5Ln3)>%-o zEHU$VUQ4?m#?;(?umQZ?soPi5f#bew4R|wbdpCeS&k0(|3Ll$9sC}^39CF?fj^ng= zLz6Wz-}}yXMW{9=}G2$Z@3`S_{n}{ool1&(Y{X zX|Dn9Uf}yWD9F?;ra@0)c@>B&uOYM9IX$Il2_K<&EA0gJ4N&o*HEfy$&1_5N&65oN zY66Wf5gQH`9-n2Y#y$EP$XdN=A?NolYsqy42u8*id1s!La^8Ass5OL5IPtGVAL@yT4EcHunL-oF00Yk= z|50;t*uI5QqZFD5s8yqrFV5B&X()OqodVETL zxa$cw+r|h%*y9R2$?tr?R{F{gX`ffvBs49I_EFid(>V$3CEjfb&ULlMXsys&UZg>J z2wdq$%OFN}G%u#~s??4^^FD=+*0SxWr`?%sm=n72=|(CxtVt=f1fRv9(CC%%DESC< zxJKKBD;@Y&=2u6&N-819e$DohY zikG`C%5FS&Kk*s9x_d$eaO{*Z_`P|ueola8gc?~; zEx6Xxem;M%0!TQ8ap4bygn~sp_n$a|g&SLB)DjCQr7?8(A-rh$t~WUink|bF;MW=s zP`;yhOk4jo&q?U5ewuJPI@22(8lRsr+Z8&nU9Om;@ivs|*mJc*#Ok^n3l7;{1O)XICU>?Zy zAP?u_*0-^4{48hpMmQ0c@Z>uaSst3PM&@1A&JTM0B`z+qvzM$`RA8pGqNH}|QLCUo?NONpVi7nR7boiR;ZMsc7e5%1hw1E&taV*< z#p(x*xKyJ9=^0PfW~+kb?3Btzi&3NUg`}f*QSJ)R24JnkCL7zqE?SdB3V=*^2BGGh zHZ=~Sjf*u>fzolXfIG2#8>kc-lhAIF@itq^G972{!SCBH<2EGOC+yt|7O|BfO$$jb zJ39~GeNNY~|61#X3VuxMu#Q#E=PU#@PM!8TiqwF9l}Hu0>Ah3F+C8<@wmc#Ooi(Hg zi+^u5y?V&7cRWsB95$u5*PLZ!cY)9S&BsmXyDkTzWMexx-yt&l@-38OAi?NM-ZN`x z%0x8j1$J|;{t0gqpk*#XVrf=@XU>&gUp4qKvc+m?h_l@m-R!lE!CTC6iTjORDnAAeV5i zqCW3>sUPp}t&i6vmW)y<46bu(Cce1H7eyh>b^eZKY3l@dYKah~l}q0l#6#jPf!1VmNQxh*= zNG}K_1#w?*mfdOBkjqTP2++_ZD0BQoAbCwdlVi2`|+Zl5jkm~~4`dR{#SVQ|RygW^H8T8lM7r4n?h_LDVl+7JtHM8LE zG`f(4i0@>4GFuuEtLgx%8wIVeCwan-z-jX61kb|CDOHxI#Ob4SOKL%^>K2G64)j!m zbEz`pGd!dknkozL9J}vwi_{nUc=93G^pG2EPQFkts zuom#|;bI_64MuvD3%F||N3};lpLnAOMS`htQG_H(LbsS5>L>^NE&w| z*;!e!%UK)ibo~i@-EdNPA{d)NYh9uvshg8RGml7N$#oz9=0#n zAty1B`pj?v#gE2bi}i;JHboO)Y53ir;h+j>AYsoaat9AAh?sr?$U+ zGM8X%l&FhTpT6){Q}0fApu>aJ^mp+X!;|WKpXZrfu_d^a?^25=s(rrF8BB|js?=xj zd<2vSE_RQW76K&Lkn@W}12pY}9_XfC9%l|6kwcyL+fM=fk`UI(vT8}MayYY$7qD8F z-dG*rkb<0_AM8TgY6WKTn?P+<#n+jLY97eR`q2#@gp%*rBCQ zMlilLvOLH0MHq#-BRh8 z(Uw@FRHghQ)f%-1+h4uDQ^O$UtdG#sdXCGjx11fKAlH69QSWeW9JiRgyg+y)?o8OY zH&BEqkT+$T_iUc@qOC?t?>0e?FN0Zb-QDQl4ZbIPuk@Cw(! zMp)p(PFFgn{aO&D!PGNq6_HxkL|{q1m;TO9U^Kr_A>rx9_}N*G1-@ZtJG+3V8;8Ko z>$)a)yJf0ZCm#w;SALEia9z@+zX0_fp{F6motSp0lQeC1I6f*YG&e%7K}<6K^>ZL) zoI{(^MNa`5!zx7jv{^hQDt>TtI)GAg{}sh3L18Ak0R@C|PS{4IeSES|` zmEgpduB6A=wp)r+6A;0@^GZ^cODqMTv;v0wvtFESR^cf<_6QAfBE&Fs4GZcb0TChle7_1M&oeb+RVG~nlk%BjQVoj#71 zmzb%^XSV#cC?hU%cAOBkM$~ zB*uaL!6_e5J_HTM9#s&s`=T$Xmw8WN@l$x_dUJo=_68TF!9c_sT98+q# z6=%is?UFb4P4=#GTk26KqGvKZQYZD`6*UnO{X~$oaLDLwk$91y8{M4!yeRGW^?IPT zqKr$wF{t|3Ga^dje#V7tufZU4?E8+))mf=M&$8E7&Eg_I9)OfVyLm4)1~n$TF3FSr zmBFEyu-AQ0q>Ob}9Ezu99q_Jk4{i&(+$fPa?jv+Ut8Fd?b$jiV1q?9j9?CRkIM-W; ztV8FKLIVq~^9Y+i&AycI%`VyDoOo;Ya#U7wCd@j{wyiHyUd9U>j}pk!Wr;h-8Tt8z zFl$s-R-D0TBpXlv&Wpf=xi;79q49PGV?QV~1|tx?O+2>IA5m(Z^CWkj_=E~YpwE& zij24d%lge>p;_B01!T!tNmhKsL9gU_G7q5?u$ClxN^7c(+)A7L!$z-Fe_!6j!(NdC zF){C|+d@jwDf-D>ffOSrfZgDC>2GA~GRaLv*?=Bk7?LSNw7g4>+&tMaCWIisomCC$ zopzGjxDCq0$nT!7@-B0S=4B|aeR^vUm!=yGd3nM3XURRl8qWqEKabDrXn&`x`0yAs zp||f>C{ie}nygU3)@1MH{^Sv9tYCMUM!fF;C3xm~Qd=5lj9R*(&t&sLyAwTiNov$& z+c>+8itH*HkvjsLFw&*XtG1G`bF`(KX|V5wRjgl^@S&!K+sZk)YZfPNcVl^9ztO`) z`qiTQmV@vwkjk}^q?;F)0>sKIJNPEcNCgrN-iLvt3c-K!(!B@CmDHwZ!%x*S$X!wY zPhT<6gu7nT^$jd4YJXm6av8s1*^_SB#_N_)K8#i90xz~x;r6bH;j~bmd0$*l?vV># zj)SNRjV0<#m3hqEUq+zHi5V*we?|3Hj&m%$!~?NWKW=F#)qn`tl;F$nbw;7t@%tC~ zZni{@2;xGd8}{B#odDBZ-GRv(&Q4|=vxeSV0zJ@Z+`+7%#|E0Hd*7O6*tH;jmAHx& zfYS|}73>YfIzOUxvsw7!a{U|kYv4sL$$cFK4AFTQS@BLSd4}|j4n9QSNwy|FEqrwL zQE}mn1aFO7kfd22Ht^);hAktc%nxw+-S7ORt9h6o>K z7A*^V<}ffsg~W+#*;b`l3}#1cdaX}j+A8)|d^jm6ta@=KXNgW9yha4F`as7*QacWC zJ_okx2x&edsO{?6D7~=z>CZx@%VQ!}5h>74k-}KHAfig)?ubJjV@au4n~F*9Zc~dK zDeO01>-2ReO00nwDfzDnWLd(yJXaIlSF&I{{b=>m+MR29{g0XOGqWPqNf$AKFUeJ$ zQJ-GpJXd?4aKGK!3vJW7-62Ow7AsZjD}fRwR!%O8MQ(DFeT!rk2tXFPtJ@6c$GZX6 zJV-)QIa_iDq;n3CU>YnYtIMhYd2)Xpo__T%S(U1-cxnn4|5(Au4y{)XBx+omw+p<> zY38vc=0`$#-X=$#^p}9C`<9oYGy=+`U7f^Lyf`L-LfkldfhTp4Z)8>L8nN6qmOC+T zLQ)Vu@i~np7I9mxj&51Z@4#C{HwiiThtPmr3Ix!q@?jEpdcWff=aivsj-e^H&e{;-tNEZucIG)!AO&$E%)c^6EBf(xc4-`&lb+;a4zd~dmGrIfTIsb?y+~SbPx{c&_=Sk$BfJl}n+Ih_racbU8KHtS~Z;J9O zZR;NGo+o|OO~;2^j8g+T{@jv*1hzI0TS8fVUtKiGD%F=i&mDNEIBF;0kk}<+M0z#5 zYvrkTWI!*)Ra5%w_|A(wrcMtsle@ZH`-?}bn?ooH8D^5S`Q{S9>4?Lo68=6$G^f3? z-Ka{d?7>CFTR3@~BLqLlI4BA_7#23vHhu=jJe+bXeg?Yn3~_GKNo2POFL=(MA-Ml@vymvh=eRjfoamTP2J|}G!li8_ z#)0*>xF$dlHkvO21{)>eI9z2r5L{-dU_@?NM;N%)RfS~HfeeY4)DrP4mdDejm+q@A zEyL(-@6Tt-hCyI%{AUZyH$>j#6cW-*H3||C zYXJPwAVmm+L?6XexiXPcIJF3~72omJ`2*NQu8d4>FuTff?DYAUMG&dT)l0^^{_Ook znw1hB)UGSt^4&OJCKr({+{N~bo|{BM2<`8xGh8|$6qj|Tj_6!317ZdC5VrDt*{5p( zU~Uy+V7$noQT9q03BXAa{2oLvd;XmA8~3dO@Q&kXb&Qz$HOtmd)TLy(UFolFFnujjpDx(4p-rw+>WMSBYcWRj2E0mpu3MLl>O z@dhJo`OYFdXcepCOc3Xt{bqaWq029cZ?Fbzn25|B`de8}S^sqp@rIZGO)U`M@k`7@ zgv3U~331zGsN!tM?t<@Z@zm+;lJT?oFt2$mr(p@4HO(wvE;GM*02S;)rP(zmr9gtg2QFV8fmuhN$jJ-Vbn05BBk$(xRaiAJuRSIE1O$v$vx z#Dh`t@%Im-Vl%PY)viy&S2NZD{d&R19{Lusl{`^_8a!Jr+&bg6(vX3B1L1hb98EHE z-B-#=RInpW5^u*+6BLX7dNRAMGZ9Q52k8PRrXI@uLI(c`cCJ4ZjXy&i{4)V6qAYy& zrYWSQX%7eHfmrY+V#7igU0*EfQ?lra6Ejy2emHlsm&%u@3XG3w=_WTSsifBuzTgsL zS3=I-!{8^=u%wL|{Ha!^ZB@W7s}!??hwU4x{9K(E>lSfh#t-H)LP8!Pe-$$3tdh9? z@i^6_ieV_&_={4H&XT!QJz8TXn9NobL)`je)Tw3de>*JE&-J@@?^8_~Tya*mA33%f_pR3Is&t=SEboDkJQ08{7nQSnx!$&fHBp!buG7Y^wlX_^SBi^vWX6u8P55e1_U`@-|blUZGOb&u(U{l^w(pI89T5m=PCc8nc7u?q=j_S#x>NKRy}JN`O;5%{Nl#d+f^yrSe&6q5b= z(>hy7dCkdHRaT0@Qgb^;=9t0?jv%s0hQ+st_&=#&|bAIhm zK>U;zua`WVb~!iD z{gpXb188ZM4?%7o;Ws(NRw29$jbIojqLBD){By})r}h`XK(BY|Jh@?%^V)-s$3|S! zr87|`ui(DWw8HHgvvwjmk`?_VJN4x2J9Xc)ei!SsK#OmO0+y6){1DGcwo+gyw^8@?oFmx3NlL0ZnfujEGHcibA^6E{ zA*f2e|Nb?p&dqqaZeRk;N6UF({Hyum%YEV&0>}UAu-cQy$S=ta{zyk77JpZ(Unx8> zC5WhUdpFkp%;f>dJ4Bf8ndutg@prT|Q!Gc&B@FjsgE5f{9PD~eZc?A8^NkpS66gZ? zA&>VEuj!q6XtW0yRA*GUyf!oP+t)N67q#?Q$1{6u!pJ}}__q-Sm%KI~h8Wudw867x zsk7`kP=-k(@v5zQq}&4P5)Qx7}+-;;Ev+;5MAF;nd5OjvOqEMmR zpocUE3f3vxn2)uG>HxgNw=M{61Q-aM6=!@g0!gs;NUXwgQXOvi!{^}NGym3?|LK-S zign8H_vVwS>r_+ZOw^7LAruDFc{$^K=n>WR9d|{Bb^Q1mwi!?Ql74M_y%86D&NWDo zNvRen$zUy^OSxD3bax^}o_;>06yax00`uXw< zyGZW9JxBHKY>M9@MxvXDa?;g$OSJ*(tR%#|)|)?C^J31riD34IzOeB7@O6#f6w%=* zx=!8p8th82A>~_E3ijRwZNM$Sj57)xq1EI70`kQZ$|n@@Rsjjh&Q;fhlac2UjbT*)Wsv$DA0gTF4kCeG#P17H{P%FL~mr{8_s9QRlmG8zGIR-9;Ah2<0x_VhDJ=dFsBY z06r!lDqJq7Asf~d?=rq}^=2tszd_V1MOQJ#f>-A5duEMR*tgF7bB8!1qth0+W#7@s z`lZjV=m}lDqKI@`1>v^8mrfnBbao~?3%iLs?{oQfD#hHT{ffj7b)HWDmO{6q@KWkd zk)~IX$YM^dk9gJ&G0ti*?@&6To1N{l^sU%nhWz8q-4)a^+FJW?kp1hFqoQ&uF_MaU z=|=raEOX$dc#*CZz4l5}b~EBT5|3an0o#m8qUPq=&Rycl>_lc#wk_GEZPmRKTYM4TDHOYL= z4}nFctaL^pm4xUU;+z?fPQCoP29X#Oacwb?GKMx6rk+^?o}Mq;7>QbOOXU*eZO(bb zC-@r6buZZk5->QS%v!sL=4offOhQ2LLQ;d=H|5gfLn{xJ5pQ`KBiENPaYfu;%rwQ~ zSGm4qS%o1N~oYy#C@J?_jDg= z{FaZnVc`zcbpMOlz8m_TN`y>AcyX(oEpst$% zDM+&vl_6L7%fp%e?FbAJzPS#hPRCeKIIYtRp1=FGD1M!S?B?c3b{4&WV@K|Borfuh zH8)xSQwmik5(zc2ujUC0FTJLr13C%6%C4y)zdZ~3AW%896%L@C+7X7Vgx%?6SFVwo z)reC47VM>&D@WjM?eucQILD8j=Am90(Clwnnt*p6uWu6ijL;#T&8xwCWssA|na(s*obH$HAm_~0LGPG${M zNkU`MsU6W9*?W;XGeHl`NcR>X43Qiy%rxP;*fyXlyl~6*VYFrH1oRr3N%OzpD#Q+< z!}a%jQM(s6jY&L38Bgaa)Y%aw=tvNsI+e3XyPG;6ST(qDni%yY`KVLo*jT~so-w^q3YAU~6q)HaK^SBSHL^5Egek^n2McLG zaV~cx>7-Uz*2wZ?qZ+n!O(n<#?1*edrAE_Dt6b?BVj`9kV$33@0D#&M;u(4QV%(FS zW~p7~(X^)HT%w;e-_%f+pTt6KCUKQZ+43?7 ztGyd~v|>+U_`QB26xHo_f<=pjy-g*#C0;9j9@gwRK1m<({8jv4wtF)bW7~FGr)(Kl z-iq3ddSXue#NKxi1KF5$^dc?0hp65BITyHKKdcs>Y`a8}ZZaOz*@KN9*;M1J)q@dj ze#7nH(3f0NJk%LWDgk17`s&bn+7H;&1$bu6p6m}sONS7ou4wH`YA5{_$718W*O)xt zl62?CQcxRVxu%O*c9H3*xP&=|$TzOAw3d0P7!+xKZ5GQ#kAk+!8U0gR``rgrx z{apmfHksi{!^muqrddFwjg6`BZ0BKa&=DVxZ-CBvzK5}hUJTi@r{3bCLz?@sgZ)7i zj@s>E9JDzO=+B=K?2!UJe_gRW)l~$FtJ{+J<5V*F)S|lF$-^M0-(3lFZjpa5tti9SpVN{gOLZ}Wa^CW~{iiEUc;*4|J5 zT)=ZT4(`PhKDpI99&n71TOIBIii>$|2qQ<J^6XNYK^0BQ1_19_C3 z-%<<>{r-iQxwKP0RNfPXU6ZyS>Bxhtt#FweWAT|h27M1_YDYY*FoRuO zu|*HE6I_gx_mJZN7*M9d-V}PX(>P!2LZGe3t7Ik{u(yt8HpsBLX=3$I`qn*0trJrN zrVvok!!l#v}GFqKyzJ9cyE?ghboc>Nj6FuDKPG99*`Rh(Q;%TlE@kH)HnOl z8#&vNRK&s4NuJiGc%u@vNvVkXOfn@5eWZ(+-=EqRvUPA37JK-Y zx#R6>Nt`%q5m{kY=Aje!qCQ`LkHO2zxdj>b2y2}ioi&KBc>M=VP$Z^VcxczA0Y%N_ z3QBO4dnJU|nTQnQ@AEGaa7>+wOsbTYQmP;nB|O z#K~?>k)Ov(lZc1@EetLDkYjyV5KbKO5g+KlQ~o0XU_~ zHWi40EdRyoc*B0wGN>AlHP;_%Dm1g)x~{tu-bnCw62UF! zo|ljpp%@tbp5coi&m%(lGoI)4&Jy3!qh$NcExTVWys$4)?Brkhm9l+wX0dW>-=VU7 zAtJwDy~8nTJHLH&+x2U{@P!jt#6(SNtw}7;Z;nURKU~OrKQ+fTG~8Rng4^5cdRL%6 zqcvH! zSg(H^*Oy6Xy87UYRb#U0iLZh`(5&va-=|5!F#=MU@jyn(7XK#?ilq`*J<+6rc8O$D zOBN_6ewbd@6t7$6CyuNw+gA&SdA)mVhe;e^3KwS6=;c90h{z8Ys6RUxZ)j8v6^};g zV2ZljW0IqXt0o#f=gRxCN{PyVq%(c)HgD{hbpEykxN7A>%=} z^|`Syp*118T@qk4`-ygIL+0^xy{(eOdD&=c)~DD}4i9hA#-uV6j7n13|WL!J95XWCCYOm+PE%CYL-#T_CGT9=}pn=`<9tY za6Cegicp$}2lT3DTxm0l^+EMx(}u$vhD9r&s@BXe|2mRQm33H#e|)UHxV?3GlhvHM zaSsO+bxu+KLUb$z3geaAfTp+g%LjZYqqx(EERet4gbAAT|Bkx-WLMFq7z@ivmzVe5 z_u#hs8G8PvMZsgY_Wk8qHG)}0cr?APB#3_f*7 z)c}^nYm70SQ|B=$3l&OzG&ud;<;oFKJk(l9USvK7Jf;wa7nk0>4>;_`YMrs6II}~> zsG=PW*79r0`|w6;I>96&o;e~$-%JvhK>-MAAq&Bv)$-zt6=d`=4piaMXeyc|qHsBM zCo@mc<0Uf2oSQ8=nBZ?Fd6Q8eeiq~F?^npWHrQaG8cLb8WQSkfKz0}bcW^0avKNl2vQF=c7o3Uuy7vRthk zjpVy36-5wMB)jLOHlU-HhYCsnLh(klTLg|_H;lA_a0F8TN=yThurdx-eLl;au;WGE zr!%reQw&k$bdMp~j?rpg0yWM&NLt<4x;LxRN<+TqwR$!E;*r`-hH6)u4-N&kvwNN< zjQSc9L$uLm&i-3~;>;Vtj47oSV4V@ZRDmtd(z(*_%3gd7Y#NiZ7hAV(z0-^Q8}co6 z`;VNr?AFf2>UWGSy5_Rg0;v?fLdhu8dfr00-SuaIi;1myfQ7;LUF`J6!opPEW*WQ~ z6OM&h91LzdCx!9p5lp#^H)YFYdyl(*!wV?_xbQ&fPht+AF$IjU(UwSq!iV_@f)GSJD)Bnmj^jY{Hz{7%jwTDqFIxsCnPKy{(t`aKmWk(&Sioo z$_-}G<1;^BFXT`D>qF-Re9iyAez<4j#Xgt+&%5yZEA5GX1Nq)VAgZ61!E^3Ls0i2n zp)r!8z0SKAq(NA%@*TpUYR_>d0M~lc35*hT@|3L}#1PAydu$Ywx90O#1PGBT1Qq}y$b$%< zNK^Uu{kp>K2KIrBIcmFfU&oCui5^H6{5t=1>sLi*q5~!|_e3AXzi+ZnQ4Nw5+ozyq z+o$w6Q+w*4UnshQhpW4Nbj9e}zaNRC`V;Vltr+i!0<8~!irDfbOB3WERFPchzvaJz*2>{c*M zB?Eg&SGhZlrZt~|St^=D^@&>nU}8xh^}~(oHdB8swONZ3Zi`2hBALG~^q;#bi(BWj z2>+Pvc;54r5%xdV@rdz6su&Y@1pnRty-fdEb#C)#;6yTuhnw&I{gwXQ;*g7gBKAI? zKlkr@^3PAge@UZ<+gr)eNBi&F`+Mc!DoVn3Dqow*`RBL&`$I|;xM?;U6#f7Fwts(s zFYUG{iJ#p5=ePa)LyRM`rpOe8X#TaP{(OJYL~KZs`0oY(y4(MK!vA}b{C7$Jc~g)N z|L@-68m_*D_mqUxohw)fHO&uz`CcX2pzNp_t&SNZn-Rwo@a%{N@c8fVlcc9|mL`~u^zVit23$_T|PMf?+3BZOg4 zU%R2=jzu>2{VLZ4SM*LCXI<5Mo=p4CpJJkgZ=vTdFYzD0#Up8`S;l_pTE8}|KbQOS zL(DDsV$_@KTI$xHf*nce2%`l{?=rh>9`CpIj_qXUHf+9U#GEe*; zi!^-cfX5$WdkOz@kyd~r{eO4We>PnB+9^Izr2l)tzn{r}pYZP`_y5I`eyH#U@_6YHLi_vo{^b^U77z}_&edPC>Jro*);u>m>hZ4SXz=B@}_`txQODuIngxqk6? zrMvRP4JM-8RBAf{AuveI0e5xvN#@bQ9?1R%t>P_8r$R&hE&u2BbyJ2HUV}Jab0%`CASu5#lu``Q+ufVv@lJQ9f*|BgP8iJ8G6(R}{N22^ax zEo|j1LQt9KfWebO=+V8tY}dYZv;LX}EFcO{gTVLpz$Po+dX?s6{`BolSsR^1(|GQgm)*uv+qa#c}}n@@3Y=wYL)pLx$CbP9)$`|!f87c z_b$57iN17ges1+U3*gU>zy~wvsf+8Usee}7o44Fx#PQ3(lK7w1^Yc5NfZm!i=F?ZD zpZANxEe|tT*(1sNfA)vUYj9m;6EVd9bE$u>89qFshU)&;gx-HF?gX>=uqBONIi2}u z-Tto+Zdh(cXSKF(A&*c|^&8059s-hY_H)pK)I`2HW-Wy9>H++?2Sh}X)4)I%Kz>oO z7>Qa!GS|<&8wmMiULD!CtfTT)5<~P=|**GJ((=F8!RKe|d6(G}{qdMM^Sf=dn z00V0X3f#qPkhG<_yLoAB1}ypoAZ$!dO`1R6pAPIf0ELa@F#sg%**im?3l@+z@NmU> zMxaMvbk(l_L}b$_k;uy`I_zi8jc<-g9S#0`I>-&WSpsj#Yk@6CnI|zglr9@xHpol` z@yEB2qYb1rT6u;{9dc}5kIsQ(T48cYPFrX$~ zaGAa9kYl>DrHneB5jp&_vY|sDyH|5eieWy?4%o^sx)TsY8ip#Qwj`M2Skm|b5h!pBKw#QdF%$AG1j61 z8W*H_fxLqWqW#?)i3r+K?7uJrVFA{TkbjDP_7NE>A)+x7c{L9P)r?aU!}+s9A`xBE zMzY8u<1rfS_l;zf{3A9!iEs)4gUci>l~IWWb{3rP^u`95qXcnVBum{0!sDf*4;Kx3 z;I$gfF&9WUaAC+whPO`SSm4}52_H9{$T!2=4Fx#rz#U2M=Skx2b==*vnC1uG@k2K} z&PJl>la?p-oA}Iy>+xGk=7<3Gy2`6NzMf2W1B9Z01#F;QNm0$ndXuf4MAy>6b;otg+~=HA&t4 z{h_hVEc-rWGZ+L0+NAn$$ztK_?rW-p7#%Yd;FSwev<;gk+WYE_?LBQ?M?XA7dh@7l zo{Fd+mSdoFnII|`33LQb;*ha6i72aSBft~NE#@x^HnmoZiYqfiaTd-lWR1JSUY7Ix zoH?ux#xelh*!x}Af}(rj5bb?+JL{YuCExp%NmMiRc+VKGM)9a8Qe_>}ct1Zv=mzaZ zOj~JEWInx>F`Q`UlsRfJ3|o&{aTLyF&77303Kd5o+0Kc}tF?_~i`&G?s@>7+T#?sQYRi%cQcw-;EjNEVBU}&&=oE z;M=vok~e~_^}&kM*nT%W>~&{%&DVDa2M@AQhs;Zb{aSONCo*2xaXc*+sn$vDvYDFO&VsU3_s-VVP9BXMI#od5MW2}VQ$UE8lQI%EqNFz8qAh z@X38Cx^?Bk%brWrH}|jtnPM*8Si|}df_us)?cSrr5K%k9v}%CD5*Fv)%5Z;Y-MmL| z6CD|OS|IQ0m<2}o{pE4GPmp>ef5nvY!HA`)+DpD}QZ9whJ141jM4R>PdGq%*w!Y}^ zC$hV)dRsVD_FbkqZCh{yjWawS`*xqox|L%NLLf{T4lP1`Px1w^}veY!?0lEuY&xm~4n z*$W7>u})~u`A*2D3Kv)6{ECI^I{GfV?~YUG`x|D^pe25qCX7x`R;^@x>2GZ zcCN?X<^0W%;2B*%^0{2LJM>bmf=57Qp<(mM_p8v}nAj94H4yxWC@Jk&-U%t%3no5? zYLa~N{Ou>RiA7b7ZAGf&wh4OSMoC{TtFWdgUy81dOo`9%E&9{I6|g-Kw_DKngCV*D zXAAO!HlQMM)xYvz3VqSKw;p}$Jog=h*{W4FPgw5pzQ5=#*)Xo$7W{(T88~(m{eD4; zdZYnnm%M!FSwBb9M8NqKB$HbQ%=+8hJx9o%$k>t|E(|q>*LFEBvTD`WV$#3%vKf_c zKl*WSeq=B1h%u^FlG4c2Rf1rDyZYFrN2v{V$KB7*El~SI>#P1ByEx*7T8_h$-*~Eh zX$uaL;b*J_ulpQAyYWd&YL-($y<8U)K=Y|ox**551Airu2wYC(=yZ|x!X(RX5n0#C z(?x4s4+PqLO?XIN`w;!?0)|p)HWxNUdNDg>dv)}}U8^Q9QjT^gdGX<;5)u}EO)vYY zS28kCoY6d@!AVxllX?T9P$Kilzw4C~n|trk;)MKZ7dkbdU8v-yRKNSOh5@&3aT*No z>=z2eZVF(%=fEq;aE1~h%h|@IKsoDsH3l)3)JkBAL24^l*a)`X52rnJw&or^x{XyW zHWd_opc`e+|Ef8Vhem&ZTOm*E%i3`XZ~lt1hFQoqoRSjCJR+p8=^@>^W={0Yx1t-E zKYhbuh4JaXiwIPEJlOJEZy7;5(P4d0;)gM0Mm78UMvN2sT-e6-X<%tF4{jnHW&X5S za*d<=VtyR@%(D(a-FNZ5OG+-hs^jDwA(4M|VQerhVF1(F2tDtXX`>gUIJqG7#0b(mXxc+-X1g~ub`%PTO~nIL zPc0b*8-#AH{`hdHMNM$$7={Y23y<`@SEnalKrx)jFU33maeS&N^6=9I*PF;e;=4|p zL}cil#OUnfopN9FyH(!)hn`p42xon_7ht5|j&3`G_i&1D#N$XmJT*DeWqh4gm`o@0 zEm#)>WiMQslk#eYYe1h425xTaKuFkpQ6G=`@{(}P2Z_jY|s9gMEuX_9zPNKr(+Nez4f%)cm;#v-%DE}9a5#nW058uGvz30 zIBoO9-}F<(gPGRRq_H*YB2l%|?Q19{O<=7oQc<%Jn9q8JEZ@GamFB&ah0t|<<_!Ck zBbFTHmRH`a{n0p{#%jW|?wb;1dFYd@{l_1w4-!LJod?#s+K=$z2w4Y@ZkW$iB}-5;sa`&6xNr;x@2 zA@LQ7ey`8-rz~?Qx#UJxdpsAI47h7G#)7Nyfv)`NqZa*dJcqmyQ`fUKWHH`{&W8rF zZM~I8?pb58E32a)S-Uk_ti1cjU2?U)kl<$NdEfh*$92=9G|FS1aW85rJxfKewm&bK z*I2ElP7?t!yE5Wm-BHN zIm_}*1#=j^4*F+2^Ew-Gn+-y6Fd_+C%~eELF0=&sh7)pI3>;3#fq2X4A`?R$H&tx5 zwN&vh;3n38Zd+bA{)z*gol2UuG%!*kMBz{#4}sIzEK}wB_8+ zaZe24mRm|w0%ppnKg!FrI{CNedhV>gU@4*OjeAkscbHbANLI|+g2OBiOFLZHs!5(z zpcy$@goPI-=E9mVn##|rcZ%r+EH&dNDakYH7 zS=>j9^F{59jwPOxdiVv_thjEM7XT(OZP0;*dD}m7g}cM(XLl|tf)_#LKVs)PuOL=? zRMsbez_9JAHt{!fIA{g*?=+guE>cu3tAt9%gyIN_508Xl$T=tGBk7%5%eNrRuF%;p ztW8sKrqdx$GnTbqW98-ZW~?LIArYG_L#cvP%tg+>Jp5SqsvrcZH&3D#otd}n`jjl6 z+q<1(!!e&TZy`!|z}vFzT%W*!lbyoTfl-}x6WXAXEz~-C%qGKGhI|%Q}-g59*1*sSZO5A1Zfm)d(X1Z;%x&#Ou)!rOoFm@3hAM&83Gj! ztcR5Ox%mZ8-p=gQv%7Z5yU>ncd=>-GZf&i?-PziKoUdz2I6oJ*sJ%l0#z1GLc#_az6A90Q}b#L8D5aEI`3f%Z%WF_sxc zSFJ~`B(PM9&k1Lm4ui;zlr4^gGq#cD_(iLgzu5Ztmv=jAa~Yyx4X=FkWUIOObuwfL zILBH@x_#)ckC&M#?(Ob%m={cXQ-*F@g8g0+@+lm>=r0a3s->k6$ue^b^i)M!*Kw%L zCl^2yPegeHXWLK7N)Rn~*14;)Uz4&_wC{LvD`}*VVqSSx>^E@XmIa`Ry{~dT+#9qg zk2R~dVMfXb;WuMDj!Z|}y1x|&?`Nv$M_4u#$*1V@Nh^d}murRWnyk)RC6Gf0!>>eB z(#7EWAmOP|!3(VddP;47iG6QLmLIkN0L;wMd2Ja@ZQ>?~ed8gytsq}(>9cWVb-*cE zie95SCS$zhb)OOfyq~yoV2|+ zXad&~Gq@b77N!*D}H`4@9IQgwLO7kYXm#mm*PCweqN|B&=>}@=BTLK%TexjzaAXE z4T}EcMY$gZgU0EFYOiZa&r}>AuE-)nulSMh(JBxYC9>$re?Nl5!kmTo%MCqTtB+ca z>?T&2*Y4QQr@kur62$sh{T?Q;e_|bJCz&&t%|SO~D3EB%W9J+SnXk%AiVIi%JaZ0$ zD?Z1W{wFdNE>d?!W98=Pw)(L#ao}Y|US#C*(r*;2c@AxcMe?8xfCj{rmpSLgyVlG@ z#OF@r31SkH{cw((Njj0sMQi0`ZrrowX&4U^o4e&sLYirC(i;&J>L28>3ec>GYiKe z$GfsjPAFd+d8iZ*<@$#lh2+cX+TNLms{X|l^_h_g&i2;z3TDa-*4Ni5mXCJ%16njF zi&8t89!5s~Z$i2VEm>jVdsy5a0kW+dP zV1@9^xa@6!mWCvUsE$V106u;^#|RT&UiT91$Q4fI3H?q`TnKchk!#Zs@UkjDw(WZ_ zG&r=eRa`9;@;JGAe`Mus|ApC^1rhmMA&I3-Xv}cb#xX@9h(7!#f4Db_kFMvT%P?^P ztR3h4xhcG?bh6?2+C<(SQl5eHf3V4THtGPO`AM1kFGaKQ>QD*ZYkCbV<4qwW#>fG@ zXD@iF87>9iSCA*cJ%WZbnthEJ&&77aBuJWv(aa7GXpbku65~vgk{$fqp!#GH3^q=b zO+JF1gadwPe6X&H;}veKWQl!>oR!1gXt7eYPwThjOqjJ`ZSPvTiplLXj3>JlIln;N z-_!Q7LT}>o${jHLr$~);^dAGtpiwNlI{uKet+y~4Z`>%v?y?_?DctteT>&VJ=bbjI zt0}={%^3CEIH}Hb(ZXdx<83*zJFpMypZwEXS^%4{JzZi5WSb5{&g6Tk(DrWr-N_&+ zgIauxJp}eU{m`SDvRXoB)b3=mFV?5ccHZ!8Vw?c?!A^$zn5>pZhi%UO1=xX9n}CLB z4T8T%>Lf#3Z@=t*FwR;ioIyq8U|Iecf!zw4XuOG{l4cp*Kc7?|eiYgeQ=hh|Mbp`K zb;Vcy1o6n?!puVT;tI_uFT^bmzV9+bDNMfi$83(UtX5gWX}&~JG$bWHXur1LJj}o< zrX03#9tB^QKk%{Rec=8)PTf@ixE|o={M?rIo>}Zl$|gVZ5xqo~ z015|hAneuj7c`ru{3p*Ud*+5r{=Tq_i6)DXJSB#T%7k6(* zKT7Vo@VjR7bHoHi8&s4KVR>5CJs?$;OW*22@x;NZEIlb$@l#=%XgL|1DODi zx;Q9P&t?>URcBl?APIr^hFHTMqHKwIuluWV*({Kk$4D||T#Pt#mniv}8oAX?P2nWO zw@VC81rDHACxZ0O#H4PMV} z(OmuXnx=J|lHo`yJ`7<}dj~Tj+NzI->x4j4dA7LGJ>3(%yzueO&&upH@#y&Qmjn@QqQi&BE%c-4N%lWuSyDY?t8& ziNy!x{;8KkkR)KkpnietkEV|S^Ru62@l9tGP*JPNvTWM|{v@QW!=Z=ef)NQDYMAbS*2b(eCR04+U9cvAsJD7gBC2iZpcX0&X&yngMVze!LCVl?mZps znmky6_S{F89&b?1sXm`a;Mk`*n;_ViBGgS)c~+!$AmMU7k;TKX7lGrFz}lsAJ$ZIM zSI;dWY{VXbsst#XMd@Z&TAID}FZn5>n-3-e53anHIi2jj&3djm%JkiafaEdWuujdPGE1`8k{FcVm9Bjuv#Ukt_SNy2H6~#nhjhbX)O^V zS9uFS`DCDEl1Gz5wV3hr?};IVjnt;^RyzA|>U#8Q=O^U5F!GyjPU{I+AHpgh#@2VH+koaJ= zFspc?G*4g4o;fgs8b9l3U%Z6~{J_0kl9LB8v%n#64Ux~BFs6%Po)nyU#D6yQBVvN4 z<+0ozkjaVPp(cs+QhO>k)hKdgIU=3g*$bSKn*ET3@LHfE4_VIg9a}A*q7WP|N1=d2 zm`r%Z*p}iD4cuk3d31k^e7uh>wNpK@3XAOl=)yJ1Dld_d`68m)QO_R5Jiq%EM0d*l zT^4m1Ns@iHgAvy0LT|lEZYG1shT-eFa4a#|WS;_7M7j8s7>^}K^r=_8Yt_-%Zo56n z)-I5E(w47N$P!!2$Y#9x0f5LW;NsYKoaOh&4Cu{OwKgn&Wk~(-t1R6_1ZVLEOAnzr zO|1dBE82@h*+;E5Fx`6XBI8vr5(ghe{OJAR8M-l-dm`1)G_eZ`Q_i54OJ~j&`)|7A z)@4KL!`rrZA49>^r}KTwb>I<63VVyM0I3TQIh*pBSN6!HQAXnI1EOMw?g#Y5b`a%Z zAPr^;$M0KrMt=ala3eFXnaKz+dnq8?o*^J##Eh-$Vs4{+Ym;yPv!Q*siOxf+8uf*g zX7e_sKwx!4YbdAsGb*?w6J|wmz9@hG`IOJD#pTyX91Z}J99sTvMs;~5R{Ln}1D?OX zB0VxARKY5fMYYVVcDj+7709WB%G1Mj^A9T;BMaR1zRfo-^j~gWm)@zl2rQTAj9e1$ z+=Jl4uxA>rx~$E#du|t;sVkwYhl zY*0#7F7QnT5an|o&!CRz5jtTGn%65vg+YGoc)Ay=Pu!Y|k=wf_Ka8Z_yftu|jrEYn z&M#M@Vc3H4f>^m44hBrP&l>ZPk-%*p0R8w>v>^0S)~@-pU=EAQd(W}Mx^}hE^t(*R zixuDMkv#dWqi)!rd{7utyy~OPY=z;)SzXVLQXs&pgOqq4s|*J-N^uh9=g@Yt@!Ity z*!|Y8Ze&8`89N{84nTZ37Q&$6$wKd&oP@lI-CKETCa1}t*CZVBuikCF#jFvqg6L%9 zuhuyNZ>TZ<70^JX+e1Q3UcHwHHJ8QtAO@fZD(eTD8j=&$VI#Yqvd%QNRy=UXL3>zI zc-oP~67ua37TzY_M7qBeD60?4Wd7q40s@Z_!pNDhIL{Fz{QG%rCreqoi4hwB>tkL^ zUorl8nNxv7pZvLT?4WyoU_?Ebs2fJTnUEf#1FAh~E^u})3ms3OPX4k7vxdh`7cKboSvh?=w0%#*z zPaf%^vTOb48deCy*qyTXMpj%^UJRDR{?Fv6co+IF(utJI&`7)zRWo0Rtud1`Uo_N8 zHqg0X%Xbt+d7>w74S1IX0M@O&)F#CCdG;Lz8z{WLWJ2%Fh_%s3>168lQX&|4TH^U0 zNH|Q=%EF9zVVfsjCd(hu5g$S5oX;-dBLsYH6G?TCWcnN%#bTZ(xD|a_cx)K^mhRZr zE1EWW;eIp1<`#e)WAf7)V4ZZ)(v+Od@%|iizLG5B4nWTYFWb#T+9x&D9|18HyxiZ~ zfSYBO^XU|0cW7|PeNTnp4^@jLX%_pyLiE~Qo^V%S4+g55?t+gns9{)n>5tfGUsBv@ z%d75ECC`zI;guPMmv4C1b9*fBBCnFpwN7!Ag_!NZW;UwLI@e!GGVsd~akQncxz@v~ z~vDK=Y20L80@*U9%*qjKvSvDlT_pDWZ zALeIu^)^6ZKOimkMvL}Qo=vql8SS<`O9kBe;06YJ) zTSH>fr#U+&^hH}QnitkR>}+zQIDrwr^KrXsyE1W3U0ui?^U4}hQzWB=6GQlEi;=9y zuU0I3c2L3l+7=EG!Z_8Kwz377`^RM#*agqN*95lu_;b9&Ig4PCp;63OvX&EnJEV&y zR2vo7;U{S^u3}47P}M(+2E)IjLfalPEg>@-5$|&Is{uL8@jmL9U2M-+UlMv)?ft=i zQag^(cpdzgCApi;%-LMSB6jcSm8yWzoLs?M#TLsBzJS`P-L>tJbfOV@|Azp1bj@v1 z8i%t5c8mzzU_JfXKuFAN)frq3?|Dz1)7(&$eUbs>(lvi*mXpqMSpJ!bN zojTH|EE9G!wg`q4uD&AE*c^2d0+TW!jA_-NJwQzABuk{arFb6G<5} z_@(gW1Yj~T!L)vMx5eM2tupXtB>Bp>>waZ|rA}?Sa^Gza#I71`fqCL+>%fBX z$TeS4B4~bj4&4YJqy4SxH;$pA0RegHBO2V`1iyA)&*D zP2Lz`MH#a`=}SQhYT&32EKjqofTAt$k69cGJ4p7;{0{7+EUZ;K;|k7gz%My?1%z#q zIoRsNRwEHyQ#gaqPBg3P-Wu#N^(4>WSO%U@J6!0xnBUS1Q|Bm$=?B1jTOZ*bLc1G6 z^HwbXd{8tC$tH_NhV`0@#x9>e=NRaL6FuG>B5)e2a)M&zes0bfc)`%@Pe>dFG+2T2 zuy+K9d${n==^tr5(Cr?TFpf-d4%6Cx0-2x4+@2r#q*?%-7^x4-lhEzhkK%RX&X#TW zD%(O;Ct0J!afcSiINKcq$*WuloaeP(OamKG|A{0eR0e^ z98_c@;2Yuv2EaM_#hNkb-`GWJl>cd+W7OP&#zkB~VSy`&IujN+qCL@PY~th~ZnrU& zQ+poG)kEi0Y__wG?8(Y!21iBqI}})3Sch|@+X~7@T=M?hP&;KZtN zKf904-|(1)%5mt>SM4~GN$qT6=uqI)ZS&o*YYX~S*$^FFxJcLhu57@f|M-!?3M?7* z(74p}DvncX{PCgffviPpP7p=n5st}SS?zroZ!&(DYVT2Fd%v$&Ytyp3@k+0exZR4+ z+fbx$0L7q8Fs;D;ha9L!ZpzrtuGNMe4n6e5SwMuG`gTiA!hwwfB!7`RK8vLdDSGr^ zveFla_=4b%8WO*~l`M{Y{f`^ zC**5HZNyU*ZeG7TIqY|Jhr=&kiH6>xHKkpCw7{l0%riVO%5W}BW|^F`1IDaw8D&gP z*3Q0pvCXZO7n-!0!6ipXLtkulUuwUhb?3zxqXt*e$Fuu$d(jml-fU%t&E}y~vi%Q_ z9O+Sh^z7pKRt0=k7DBx_jt0`oAc zYImA>Sh?W4LiC;@Kb%agFg(1V_|Yio5 ze}N7CG(7nl<6!darum^S0!)e}Kg2lVy~X3aOsV@n_?k9I-M8y+B9m|>9c*9DV4;@& zz{8BigN!Ir+jCzVU>E0&W_5&hmW);@XTx@@^O(dD(ZD9*JPS<1bq@`}&8d6lrK3dCSq%K(n~+{=^a?<4tOUO2cqbdxZO@V;k~^7o~_k1aR8x}nMfZRS!iuCp1; zT@Fq<-`j9E&aoEs$+*z+(dlQ5LC-q}P)p0n*v$4vz8^nWn$^eIah&>p*n97Ps{i*7 zJey=CqoKSgTU013M-gR}5!ppn_THRSDj}hCjAW)^@7-3&CR@p#W$*9xu;TT8eg1*( z_xnpXan5r*9@pcV_v?PqSO<8UvHL8YUD5*jp$U6@AG@6`-Bk^G$0Il%FrehMGSKVh5I{++mRmM%mbA_TU z6&i+u_=CSEWn$TPT{FMd=6ivsn1MY1)0rUT(`&gZ-T!#h0Q^U*5^<$xv`wlfBifw3~_?<`NH~L8-A@|!)JA)S=z(0hn#GNrgn&E)`4}Z^9`|m!n zz-n>6X!pr0=S~PJ7YR9Uq$n}bZus5lnVsxL4Py;ODleX5`c{#q=Y_q&Z&FHxZruC{ z_DkJiWFH=+&j#9un7_@>wp|K1*NtqS*vB#@ulc$?oWv~N;}DtlVk5&iU4b<)em2mH zFLbub>zA_s6~`duZUBy7fMTHGvN*mX0yW-`R<9`ZOL6AKS30Xlb6|h@BM+kJYCK~nWB-8(I<)jdPnCR1{a zc&1uC(dUu$$jcem+0P2MbGJ0)-o({RlV*RKFEGFHC zpr7@SAB2!6p8Ys@GUDg?2bYbi0;2b_ir7O8Hdn_w)9K}P|7rH`sQ{-!RAb z2;N6woy_0pH9g8LX>P$t``l%>B16B&O8HOcpH{iEc>t(zXo~HI9yxxHk|!`_RR*5w z2~qj8|2OXeOZ9c@s?inKaX)p>$$EPD#bg9#F;Q6A=>TY4&YNS$MEP&J9^3y~C5cs} zeSlQ^!-KA%-n3Es0^A!u{P?s}`tEA+QeEOVWtJ|!ld!G()CB3DRw{*L`~9rX7J+~3 zk=N7>8I&0h=miqFvl6;1HNB)4KdaMyh1%-lL87!+D;t8mLpQ&xF_L&}z^@;K0-8_b zizpu+ekje|&kNl&F6(f%{s7Et0%}f+!BFRU)7Oo0FPaYi&3f9)U;>6mb6CmAmBb0YS=B`LVcfB17`h&tC7xIx<)oHV)K&mR;1+~-j z#t}B>+DGMq;|!vxdF6Hs-HrSS^?!7Js-EdWSrQcMv=>7{>!2x&n@cXJ^xG^fHPAwj zQ%Be!&4#Yqfh}g3Z7Nv~{RR-v{G~83e6|1h9#D1)XV@&lz2xLq5G;t_+A%yCQwlCr z)#Q-giimW}!*pB7LnT5^E_U?jF*zWFTyecW$%34`(+QAC>IB#?vH?n$s#H}JLIJrRkqOLK^1t)-HrMFo zKhQ=d_~{bg=5I(RGcgs(r+?g{&2RifYKiccITK4j{P5%L-&rFEA z?+?26KTn>X0+AdtTjJVm24)lm+<#hzlhNOYBw*zSei4C6f%Z!CT*%E(JxC1LsACW- zKpCU@oVM5I%8@?6VhGa;o|cFVN_2S>e2O+!TxB0XPsG0uzddsIv3=s!lz%TzIDe)K z$_T5yI{DDPTn8RyYmm@qmcUo9(=VVgFm(s)Dr$m6)HY+!YByBtImxCewO*`%8pR`g z1VL*7p0SCP3yv2PHx_^}pAFWm$wWMBjir>BOoX+Z3K{ zKiHpvJPhsq!HvVYnbTVRVGA%}`ybY>QB4L*t;q4x=kJl+dU1H}hwXxzbeoxrZP&oC zL#wjr^A}qea?6wVdNANlrkGZ61&HAFyxf9%>p|f7Y$~E5}=wyP5&PlaOnPeY;!Z z>TSAOsC5WyKr6-CmGrv^QMWyID1&liw_2c_VX6MjhLF@=K_8|WD50j2ra|@p6(o!o znJjOuUUz)fzn^MV8W>N3lwkMUGC|kS{|X9x8JylGXH(v-qO49)B3F>Of{l}GeV6mV$59a>UboGCh3*Bq-2}qci|C7cIRj_$e^aHF2VLFLrnM2tB zJ~RAg=?KX7|JPobU%^XI{RUZ~nm%|r1CZ!-A0kfKB4rr;Jw%|H`R^Osajh;nzXt4& zk6=gW@CZOQ9{rU2SA*B8yZ|SOvY)D*_S6~&$=2(kT@9t-XDv8>R#=1SDNKW`>pMM@ z_jWL|18o{coiGY00Z|WiuR{tH%uPepE)RTlQC)3f_BoPTalCxjW#6gCt6OM)MPK{P zWzl1zpLKOk2Ld7en$8pFv_^Ii0g0?yf!#ie5(1Ve^TczT^;RSe-b9LzG!4CF^qSVb zhvg(n-@m9*+;wuBZ1p3J7{y5J|9|LH7WgyE-jww-M!?Q**_ONY&>_i-J_M4ta5W?| zXif4M>43!I-0kM`TjjGl9fY^skn;byPYtw1A5^gD`%jXH9oWaH0h7&c?fU<7i3bVb zpqZG7!t7DBLDhBm^lrlgKWa{Gc~xRm{r4u>vK&0qr{aA(w>={_GIGavw@Cii>BPf# zfTtKrcV2(Ze?Nw+G*~9}b7w@?E{Q$H@Q~H5!WToZ^%fV^;X_CL*4-Yg=a+}QWW1~QpYMo) z4ilhzxgnN$=fB0}4(M+xg5TW#xqhURk3mkY{Cyk$eXQ3XoF6vlDzoR2|1u3bupQ7Q zI{M~!HT1S@Tb=c#WCRC8eAkDR|NJBKmIjCaeDe43UcT7bE%C(GS8$bsozMAn_QrqR z7!(3{|I_@`CI4wVyfIeDs5Ls90KEcwIQ~0mfA+u65Ea~d57IsGF@`VB>(yn(Q&`P6 zLe?IQSGT_Iqz|m1#Dhmd|9LA=Okk#*#~n5+)Z4^t30h>^sbb^)@=t7CD@J`-eY}J2 zI-Ab(Rx!g!kUl7<7Zf>mqhEKaT4VG4WU{29hE>F_JNXd2mlLR)Z0KVQA(fq?zy z_v@IyEk8&tl7pGB$5RVkdjpIWsX0LYn5@bm4b${nL``^YsizGfagsNX07 zVO?Uy`_EYy#!{&6-MSN89MDSqRHAn5=KLO)}0CHvR%hx|CRt@IrJ+)d_O7fC)$HT# z+jdbc5Rgu@pX)f4=xg}oyqOnUpubP1{*@x(dztzjBwIDmzXWR6qf%M?8U&ug0rZCC zp4>JqQLO^YEG~Fp-#WjNy`27Gv=I4%T!UMWG{z1-S1H`=w{{Tw?GVWcH-g0S&RhfQ zt;kAtGspc5$%a&{p1D#jc*D*!KZ(}%1W;xpgmDH!2=kz$wtH#ojzAaxX66)$M>QFi z)DZOu@v1c_ju*t!et)`61?&SLzUV!bXe$jD2^BS2MGPG5Zl7bT92h|N1+oT zA4jT9VT$GI^#d^6Cthnc=R{_o+O8IH;Bs1i`=Yxpq|$`hfE^Qn=rh5Ub}nrbEVQ=4 zZgBk44iWIK-|F0z=%-H`{dluAG$E&QF7LYHb{4fMSdXEkjbz-5Ow0-Fu+emqz@u+l zwSKfOe9X;+Utm3{!pMZB5cSs0F{F+=!c5jPD1bFb#Y;^Pz=TubnJg;SvzrZ- zNQ;Og-T~V8(KKr>QmR2_4*lH0isG=3=q$?5w^MC>N9OANAm2sDC^gqTod-dsKoD?j zpaSJ_*_Vd@r39H^NjmnDZ_pil!ssJfPUyS;8W_E~>N#G|d^sz54V_u;hT#;I1- zWXAsvSrQp^BDvd@A~#b~AV-)IB&kDXrYEt-_5l8yq!w`W5_fp5&0ziQFL%X1(Y=6D zO(H-tC#sd!vSysnUEmgT($^6EML7XA!gnDr-3rT=s&s-K`xv?tY60NX)x3YxTP zVFE>mC`Ik=H3{(smkbewBOb*4`gj|*=_MCv=!T#MKK$6jD4UZBorwOm%@Lc&FIryz zWs;b^RI*xre6<)j8zx)$o zNSnAZ-?_s6->+B#;;mn}lo%;}Rsx+W-Go{%c46Ke@|TQ{tE>B9OGio@NN+zj`Jgm?r8F#|zMhBqL;f z92Bn$s@&q|klZ;^>`$e4`H1+D}nFmowlgxOjDd zYW-~wEUBBR-BJzm+2MUdG}fN$G7Y3&ar8-R#9VYVcXv#WGdtQNx&t|c+sX%gCReRW zJ2HIOm!z}9FHw4H{iH#_0rm2AuV4NjYe2;+{azT?I|>#6IZqT`d3>HJ4Q2yCSu2Ym zuJiCXms)y(B0nA>VPTB8r|G>Tk2%_%GKB)v7WAMs*B-F6agf315QnNQuagr{N?rg^ zhvz)%EqbT+4@Q5Ikho_o=j$__Sf{L0v>S)M)Ebn;4;I9djG-b!YAOEBy@=)|&A%i` zy`6xMTO&ydvm6n-Jc86qS_R+#CLT1cTu4+Dd^sf_aq1Z0 z!z^N5zbQtUzi#n@A?lwcIB5cFD*^KH*Ny5mk7ihlL^?(#L&x{ zoFg!(k)r7ah2;_FNd>%1B0!U0rQW7t9B4|1Qh6fuAc^UCElmwzNe;^L70+I~ugV!D z(ha!3;W-4Gcem`JD1%p&1K)WHtMcPpWE ziTHB3#j+0xNii4e@}H(p6N7FWw_9&-?ZHbR2r8~$h00jVDSu%I`Mj@y4JLEh9ml#d(KuUHJUHkM0n4yCJ?w&shvlJ6= z(`$096oZ=+hgf+kfiLIw=a#hx{&Xg~NuuCutuLD3Wgyubv1xb|zyUvrHDmw@eh^;@ zpuwn(WZx1?b;>gxT6V=@hHE1mwsuzpu)+Q-yNY#%Hj=ENkukvyy9fwtu$1CAjJNvw zf2}1}2y9_1#%_~G?eCw^`F#K^tJD2gL^luf_CII-qo4K}hAN!##*C2xvDze@CqWJ_7~C(^|w7(E&_2@h~EWs}iPq+@=8eXsTN!IfCt(gw}@yRD-TOUe03* zFt9Iyoe)9PLYIV=b0H*J>=n0>OZ_S7nRN?DaR%SbSP5n&DIwI?^2xjnYAHMRLtQvs z5CMBY11BQ%&77M)k1K=T4>V0P5!IYm?$6Ytqjbp6Ra;4XN_8Gu1j2z~EGrYJruxR- zPfbtk+fZQWoi!m^wUjO|hG)@PbpR2^9Kz&2AxR#Lv<4tG;$(84M~w{$IJ1}b@Mwu> z-mmz0DZ!Lum7ZT|6CTNdhX_$ApH>BoQjdz~Dy%T8ExKQqka-43iylOD(Sw~%_mFYZdeDiGx03){JVA9!e0jFp^xZ8o z&c&bSN}-VW#Ovei8;BAS6Y{|kw#QK^ovnH;c?UZgn<_v-hOaE4$=U<;XCeALW` z>V)h|y3=O~51~;plM9_I#pXHgS{1!w{n0*u^k=A`r&cU=ew8H%;4}lkETZ+k+u+j- zv<8H+^t(gEfLNzfe;m6mDpiW8zN!egt}oD8EFhFN%lDa#XG^b5o?LPhzbTKpltsW} zS0PM|v8oKo%}Yfc190e%S!C9nt2|H}>M>umq$9ca;|12)IcU=1F7f<1_BwL+i5%3D zy?6txXN8qHV%wYRbP=7Rcz5jgGQ-K~pJUmPyA9|ZJ~TmgUs>ZLVkaWWDWBf!ISy=T zPgl{#(0fhUIr+G=yZ7^1D)A5_01KRN{h8}*^}ynHoI(YMRRRw*&zVB@NC6bk4VVv@ zg%dXHnog=UZYrgS+AK$LA+k5DWIYkS8qM9OPL;%Iu~lfCd%2FpXl{2c0waLAjdUm0 zt)RDz|KRnL6*CQpK^LivnQy=XnCkENSk4TfA)f<#YKn~ju53#9)!_2nPVUzapRrn+BQif1+>6JGxDoI~ zsQQ%pKG2FBnk^-%1;m6<%*@>7s!pnfnQ~)wu^JlyFaAn;_j8{MX!nHNgecG%XGRee z%oOlnd~Lef{}|1}D6okE@i7;r<*^6Owvrn`BK2nY=3Pp$ov_){X!E9 zO-Ao>&fH&#^8~c*7Znrvy!g4!bFWe=b6#YL8_*lY+1!oz8Oaf(bn@WJN=sAm<6>y& zcZ~U3D*}QNA35aXZ^OzFb?2`8bUPPt-<;4&qy5u2o}xMF=Z2}qy3KMFbT48 z^tpkjcAWYZpg0$$|8Q{dVnwtl_z)A?y=PM#y4_$l$bPoiD}4a92=7xFHKKNVA#rTz z4+3jOk$ClINiKT=!(#TH4*~9rLER$7fpG%iPHz1N(!vb6d%%_|r1cvo8qdhghsa82 zL2pkY9IyOPHZ{ji&)MPos?sCGX`;f`K~KbMEjc&sVv*9m994{GsLB`_$=YKiM?2gMGB;VY8GgCokL!=lKh_=>r8PYMjIdy>A@U8OtoE z4g^khfIdL%mgUB8rStD0g2Bvbn3Z>2ui}_8nbheD=Lpc!GnEW@+~U^h{xl)5wncS_ z;E%+LMV}U%67$sg*aDxeWsVBsKG-9i+%3^Yv**4LLnuFL1@aRLY#9o26B5Hk>g)~t z`MKh2+1`ERX&;BbGuDAB;m#^}(J3p^3J3Xor`wP3h$U~XQ~iWRGw}J<0?Q%}PHA3k zdpGX>(zVA0Kg3{_U%&onXzf_h<6RL=MZH@h~eyEA38^Yh>) ztfV=yTc1?3=>D9d>$*prQ#y$wwErH*nLS5BVnY5=GCfSjd0bGulJf$~aVeZU=e@fZ zI1ED0{NcFIa_{&pj=qxQDX%IQs~L~!(-XpO&nC2U7mFN`KcPX9fXD zpX%wGtrW{Ai-ZVz8^==B!lO;2mE$i23=t1iuNYb9dJi;TiH^EW&Om&ho_A2gmn?mc z275DYMfBsVIbP7Ocf?I4VCrd93N|)Cgv(s?kI3Nfi*hG4E|(JSecxzIJVzcT$wI)! zOrKU<=rGise!cw8n-jHCB{>qc%XyVft}Bzn&2<)8ao-l8{EE*eE&ZX}LirRSJ)iD@ ziEM?J(cWL~?N9Txc%PsUel1yI|7CtWGnqrdDPsCZFMccxPe^nu-F26dL-oZ&gZBAi z@!ub44U$DycB&-E+q9osuG;C5{7hyZx9^m-e(7YFME?oTv5q3V;Q4>}^-H}mFt#@~ zxpi`xvfnfDmGh6K{bR*Og9Y4CO&4;Tlw#^CK0fo#lV#%5J2Uh&s_S`^y@c!K zd*)Mef?)!rCJaHQ!#$qCCJ2!>`5YTN?&R9-;eTYfxgcDY`}4T5Y*D2BlD9!il<9urD417+=Patkg>+{()H=BE;!1)y2&1QiZ8b!lu}e8Y(iUUF|>7WSRW{BB{NCDXNl%G&6fuD3o+eV>n?bt4VW6pRtijJ%)S11z`p zxX92t!Oxj0=Np-h08es|JWk-efUoVcEdX(Qo`pQ>b6uQ@IsaO6xX{1GA&Wgli_dnX zr?tdurSR2n#?7sR{i|h$x%Cuf=Jb)5J3rKVqBf^}2`GA&v!Qvy|Y+{qE(3Z~1^Ep3T)oE~mI-P<1 za12a1RugEb9c*~j138o|C%dq>Sk{iIM-ME9oQbJU&*~q4neGsV_eV-4;S?RkHAJ$i znDI1VUDK@Ua!zXIhmrWIFQsHBFKq&Bg2!G~3gZKd1i3ZDC?8@X)gxFUb;P#2o~A6<&S?JlKF{ zRl!tE0*1PFM)fY@lCji=3XY!<4n>j%y}M-pX=_(7Nw)) zm-EZ(OdCfUE$v;EXzPffw0$r(^C2C64RJax)2_-u4K!GAacQPAKw+#l-mkk*$_TM# z9=GeQkb0Rq1b!tMfphfDc)2Gc*=OwPZg@MaEH4F=A3lDzPFoCmKm*jwTQa}c=w??| zyH~+r?>!1x-Kv@D*~?786%b=U0xrVd7^jR7F14?U+?ts$`k!?~#?-al>{e`*ZJ7KT z%AFY3Gy^weZ+Wln_Qsc+F#UZJKgC~x#@3TjmnlZ_3bJ~8QxOJ0VEaPT1r+PKENvm) zUynuA2{g0d^nDpc1En3i%3w6_5!I5;)@Rk<-1K{3YV_USGDofxMiqD6ye}q-7(9)V z!4F6gJy9 z4b~^IChZ)(v5$dC>Uho1nQ8X?m8ZI1OY>7tEpHA@=a@JD6p?5(y8P<0g20&FaX(%2 zTK!RA9AtxR#FOcf)>iNA>0VE8)KwZ=Pp_P=PtlWDiHj3&DlOEqDbVQ7J~*-M7&*c* zN()aTM!QXmZ;6LfoyoP|A8}OE9W-#u(&V76E|uYpfKhGkz|-?DU`plTo`NEy%6$Xh z(_$5rem-Uwzd~_ONLp2_m0Km@!H}8dvlGeO$7%m=3?AV%dgg~rC-!nPgVoLZr;9$C z^HPk|1)2ci-mqIkGj0pC#AVh*V0C2fXmu6O8B(bZ77JQcG!$^KDMj(<6gVo#`Jf@_ zrX7WOyAGVqC7 zgD<9|Gt-H+Cz%{%_U%Zy_VK>*^E0>Y1*9n@7Ol}Z?0A?r=@^b67vHF=)$?qxyFi}( z7nJ>;JU{E(@|LM&_9uCB)(!Ow0q6U^MULn{bUpA$XL2IOygB_#a${rPb8gU0( z-}cM~{(Yt$`1fFnx?;sKnu!n=kAbYKm0Z$3IAm!BOC zv(RlJ%aVWeeMLQag!`W_wTViH3Z8@9mqE@%?@GRX(rcZ9M&c}|(KgQcMDf5Zr>x}_ z$k`gUW?M`v5fm-Vj0JwL25R)LFwtPx`ymZOfDJa3Tm3$z(bM}e4h5mW82rsbXu_Ri z9IU}yYJRCXdz}Jyd2E0wmds&ui-=%UTkke!`q*`c|l*tEIb>S zYCn3|q?Mir&}*V~B%GboQ;OsxoGD&4^a?*tc+A_oK$fQmW=W@GEfXCap2BGmtqgFR zf3Ww&FJK{f;O3D-mt~oAhPhSY^Ro$1@ZD8+>dZEDA2SJ|vYVAN<&Jbe zs*RVED{`G07KltNk9%ceWmdGq0hm?30%6uaUv@UqabK3#%ryDO;M0R3GFrA=-8S4w zE8Po@>OPfVx)fd8K{4kr9jHtCi3kQ(b$X>yzAsVR(RXY^ML&MDCz@`EpSKkeso}zO zb&WMzzbyQzp(dwxZ%W1|!!f4hDCrQVIs{sT=%4qY0%rAPX+*Cszj*|MhEIK)nRp=W zM}OXbpWrXZx<8^}Jy!+>p9|nSev9B83WdF{v)7#(Y#1q29eYJ=2xSKC{Zd|#(HmwC zL}Y{tbdM@5Zj9Pg=8dQdtYo(`zg8TQ#uYmZUEV9^Y#sOUT2zYg2f>f`RZOd+v3s~w z%S?sp#Cf6#ibd!=Qo!Y2J+bglAt#2pHT?7K%m2Ws+RN}lB{5(RuV#H3Q!u*{J;nDU z8M_Xh)`l9kZ=CAD`kSl{kmjxKC?g==WgRirw6|sGik=(JSVXzzEgfHeKZ|JfwYclF zPfMJCU%QfR-YltB5hbp5XV+fa>P$m1n*!(OQ;A%%vp^P7-dhoJjul0{MCZs!V|KlChiEZ-IF_aWLRWX|Dx!1k_z+#xV`AtW{vmhKS8xIAN9 z`VIAbE?~5eG@6-B@R8S%1vziQIZ~ zKg~5=#a5kXXWT^Z1;?f~{kq(F0Zi`I_rxff9sD@w8 zOTWVcN-ER0xCc94si#=9l0yG|5wM{{0{(Df5_8V!L6mM#7_?t%=7a1qc94R@;Rd4z_zCJ}CG5;derTz@rN zp_8x^uQJvR`atuJ3g2ktsd+mL;{{}zdLoR>S_mrHBVLEFxa4%tO!u`jl$>tVZL)N^ z)s&)VMc@mnOsVCl8NV`#N_~?h3_3Qfj+TsJkjpf=?@!4jQue2(JUF{ZUm83gveOUd zN{HJ|TsjwUfYbG@42h5YIh*~jv!w3rP9C`bC~XPGXIyB{cj%rMtEu8wm;xr(lw2Wc zzPlz?j`vH5U}lHmXQsQ8`>eHpdl|cf<7NxvVZQ6RZhm{ZRmd(Qt6Ozi%cse;qevddD3P}e^72iOAf;jRR} z!tpcmQ|aK#uiMqq6|z0cCB9;4W_00^=$x6+mMrdJDoKl^Z%xb}p~Ax{bHGMQ+rMeY z;Cc4Q!#E}3sF)lItJZ9V%*3k#(j>X9AkOl0CufG6X|v4~%Bd9|vWedF>JpWn z08~;1(dZYdNx3h=b2oc5PTwf9^tnXLJ>jEYpe^nZn>pyk@VfrKQ1{Y)q@`iX z0ogtot?Tc~P4i9t>x@N|+EkwvLKgDTf0Pnd^V?BP~Z#hPfht0-8uqDR`3!#>$cmqz3U@_X$;htCWSLudvp0r(JyZjMJMX;y zJr1o(&;aP2$GE${K1l^5VLW*X1$20ID#!I}MF=Y_78>6Z#>FO|Fp0i1FZ_5|InTBy ztaZ0}@Bl@`aPO@_Hz2{hX7=QKBuy^l&hZnRvXK|<+me*au zywS|U?~Sqk;PAfa^3>9!!D~lrZ?zZ^Us9aveQ{_*9m+l_B^?ny%PgwXW=?|Tp^;T% zZk_x_5vJ??{1A+o-*FBH>7!WuuQi0}K2st~U4DDYD3epLW_20qnEhx%&aG)~4X^ZN z2JXu|`mTfYCW)=L0Kah58rmEa+$Ab3nvYqW^|4fBVE!{2*x733O7iK~qt}pnb2X|6 z3wk0n_}OSjQ7KB*)+|*sJ9ZX(#9G?%kegYHN2m+b$yZfr2+`H0rxB-5NWncAB4SzWHLTAE+3 zyfbz*$0x4u#K5xr@g(;)Tu6uuVS4a8*(2+^q)Qx2^Q%m##1_)0T@-id-dp6= z1O2sp{u=%Pq6A@uS>Wh<|LuM@rGm$E7uSg=Qg$o7DSS13ILmj0tOhSX5l(zu>}RN( z5KEZ4c1454U*PRGjmtC&Mb}&x&S`(H8fmJhz)ip9?tx(jK!k|1JVzoa3LTL@H6?07 z1*mcc8Z1d7`=CT(kdwmg(GQ|L-DcwLv`93PXd@k z6AbLk2{!h=>;jm=D=r$%oP>*<`mT!~LkfgjiXI4gWi|HgA*~T>siC)PwZ=*tTq!C2 z%8ApjJ^Ml1ZthRdh`z~uw?wnFk2FrSm6IW4ox=BHIAf}csa5l*U56kbo_#W4& zVUphD+lK0U)jV*<3s7SWQgzq;W#xT-iW%poL1H106!oVnV92F8{kk;CQk~?2K#pVK z$EvYT^SM!5QO9{)&2Od(9b!wrkK#&vZ3D~?Mb2S-DVcsI`_lc50i{R{C%C}1tRe;f zoox`WBVwP$9}W7j>$aAD=jSfzyXKN9O1^l^t93`KuvB2=H*_T#(}!6qQ|<|S{Do{^ z$h{nr7sLz3RZ+Z43o{JCT4F!iFP<2QfAyASq$ufaiV3mJF3az2fB4u+tjA41tazH2 zP|EkmH(kt~xgs?yM&9oo+u}0~=@_yF&o3D!ZBz=nj-nUCiBqPg;@@5iW?>zjkJ0%J zIL_T33Nl)gQ=+!N%1jsnZwLo7NF-1F)H&a2pd|MDRzJpBr0iP$eb0jfrS5Epvf7}g zeu?T`c!|fLph-0|q}%)vtL2>W=f$?FF+DzyQ4V*OP8~}UMH1qM0>MX9x;5nKW+&}> z<(pHl`h;a(x6z4nxN%}L_Qyu-;m^7Ah_%&CSkHt8KZ;&fEx^2efQX!)5GQzpzWOa~ zrLS$hmq=Ljfa%_9yq%KQPnf$I8K9P`D0os_pFr+04`-@pChcPVG8hM9*O^fZz0Z<9 z_RCEMm-Fohyvc@%?8hq%>6m1zCm&OW>DJ*fF$x5E)GiLT7dYjNo%N_Pdf9)Yg-}(8 zB&WXWHH6i~?|HI~0ZMl@Cou)juBOuST*Di!lhG-GKR^0ukuc3*Bn;{?5-Ttp>yLNdslHjJ z-Hrsu;dFpkVzdj?rbGEg}#?WvY*d=6QnAb zj&5|X=sn}J2nuj|1z-Tz+{$p6xRI46En0q?RE6lx3*#O|V64_Ls?lI;Pe?FcIVHvCjv( zPw5#SaQuE1)p-zWYY$*2oOB*P-TDAs3h3>{j%M!Mv|;D5f;SpvQ3M8=uS+8<02rZd zu=OFKdJNf>@;em!<1RiwbLq|dGuy7igs1H1+^12!do!`qM-&AqtrtGu)2>;xWS$2e z&sYt01DB1zqdgo4{GK`00Ycp0Wf@-~60+)uP`(c!n#1u^D#BaWD-JEAnRXK0#=~Nm zDY5fYL!Z+wPrix(kZlcL#iMPH1sxjAQNnu!{%&p;NQtKZ50-N4KviRtfch_O)Hy_dl7RLBBX@k~OG^1vzT_bopdc$MSsMnF4lZHzQI zC#05UZ-?FAy2VkDg&i4qf3|x=X0UCv)d!m)6`$#qmoqbbZ0qaYq6N-^Q*;Ucx0Jy9 zXB$%zQT*Rh!VXEgh@EvCx7&P!jrS8{g{;pB-eh`ux`l?(-^^k49 z-}tY$F?_(S_hL2m|8*0rHU7WL728*%^aOlB+>XP9^#8gE23nVzf2f9SCZhiREM5OA zUmFX(zBtSgpggR(wxol$Ed-o_2bil>nYUm+y!nV8^XVOh;j#gIt%|W5&K1_2_Isg= zUG~WC1)q(*(17PEq=ty^AMB;rbN7}n{pVK5hVDp`Rz4zEw6SO#U*CL<^EA;ij>?Ql zgxI#-9D2otkh5jWal{o#Grz}2xzIuY!=hr-ML9Uj=V2s|%9$z`Z|^6B0Lj8bF|9={ zU=}$6%|wu*&d0>u@`x6HzGKyRuHLhm#afn`n(?Y*f=>1@^#XSF0WjHQ^5H@XkmO^7 zycFe#W};%GVW~HsaxQ;U+*uB>@sQH$=V#+@G$p4yjdffsfbn*w1I-!6{lZk+UK7hf zq|1^e8=}m!wm+DBqKc13`$-F7Hscw|VTdW4@h{NPT- zpAz7~odW(`!#}^i90&hU7=n5WEs6PlR#b_P_gYzwvK00In+Sw^Si#AdZu}iBxV9y- zfd|EZ!C;$ExW1-8fIPm!*hpy=K}f{z`G?S*9c2nz$rr%MpvqbT-z^9jSLH!v(cwN3 zY{RUuk^2{_~v&v|n!S{sRXgrBrJh`zSS0s5QB1=8ZbAWJ|l z==JHrUgad^BQD@*ovm};e%fwrQwDKYqG3mboW>y!ZJOi$j*Ptw3J<@)uYLq`Hjh`n zxoSrmr`QF(N!gsXP&hR_Y1>tbS7WMx69@vM5@-3o9#!r)#jhA?$D#K!-K(;`c$D91 zR6FVN8=M&MNk7-g15~;>-!Sx92?_Xvl%n1zcr>08h!k-^*>}p3{N7SX zE#5X!GzTB?{neOWN+pyS!)9=Wca&NB8?`vGOqux{X$XT;TkPP|g=o(C|3Rq5e_Ho3G)0AYF9F$42a zb0&_^Z7yUpA6#-F@R-}G_&sF9dM?eMhfGZmtaG*&5BMIt4vJ|2jOZ{p_;RaH+{_*A zgBjF+2%pLj&AVPspZDX={$cMVtsKkB4kqKOt?Pk-pq*vy>@Z7Wp*SWB`QkVT!k^=C z0;bp8t~w2WKej5S4|6e)P`?hMR||4>@j5EHRYtxcc(J9P1n(pYH8JBpo9ij7cY*bV zu&H^H_>lWJw*49uV1GfY(Gw`=$wJPfu70PG4`!4MTvi!anWM#=p z^x+`X5$yoter}6mE=Sj&!!w#$y}mN*I3&``VA9%AcsO#4E`ZBs6r~o}5A0r5kFt@a zfZgGN0}*^s-1U~wakF|3Hycx+VfnYpSb?hT^vW^AHTzP_!w<@QKS%g!4_A|NAQpPv zKLLFuC)QTtAhy_J;cLay6Fq&JWF z*0yHw2ozU^=I>IJJ_BvBdjQcDjIIDbLxUZ0;v?4%9)qgKQi!@LCRP-pdxe)U9)9i_I6&V}K~7l94xL>?SG@eWrU1o$pQ z0x3nk--@9gekRzV^s)k3h*N1}wfw;?3fSA59mLD+Pii&C>SoE4J=iDsb^2Y@DVu9A z$O6Tu!zv<0)nBTotXfg(@jz6Dl$vN$0xX!KaL=CB@rGCGfAFT%6-UE92`VRG#L%P#{g^bw`owwg_uQ#o(b{i3N*f4wmB{1 zx8Ou59U$Jf>J~n5!?KTf>EI>4&{jN0mihEnL;9q~pYbUHopG|DT>WbLxG(ooWjd)s?O!i~5hL>L= zgzXU2BQWFgWpK_U$W1&KLHU*WpX^|4@rUxqSIY611V!6b+XTqj39nryt12IZBNQNU zjK2^nWp|B44)QI4H$9>Z2-U8`%dFlQ%AFw`m)>~_A@5l^-S9j&vRg^&DN(2^PnhE! zP}^_16J0>z)g1mpF?@pyphM(fCNTBV*0t&^qb>B!@Kc*p-&QN}!4~ydxdVA^F-vYzpCFCD_V8JV|7$ zVpoX|j%rR(s-Qj-56m0&rQip^tiY+&&BxRdp}J-;o5G{FJ|uDohp}|XYcUOF5O;Be z+goq}$a3Q#Vs2rCQt(&*r?=C7s8InpWo@B_%gTHKrn~MzrBi7YKa`;=Ze!o)zXN}n zwjc0qOtRVMgV~;cf-15KAUI4CecAr}y(s*&dpF1j^`SL9d7KvX5}pPV>M;IEe-${$ zXU_C12nkf2R;u7nXJlptoNhaqJa5X}}9LuF$`FbPDE z>*uITP!N5=J`Hx74~3oL*(cghXa5Bwczf%CT3(nctFeT&WtJfvEt=6y(km%;hdC{d zfgl;Y2A=~$gIHxQz}dBv!nVVnl#ro%wXS)ccC+U4@`6W{EZ}1Wn19&4b~@-0%|@JVk!w1r#)2M*V!lG0#|GWj z_ks5Fn5iX2IEtHh(^fAoBL@x;wcjsiT6LZtXv@nGxX~!zn5+}7(eoOlW@E820Vu)= zBB-=PZ{tZFcG4=U{`D0V#CCVl4)J%0I%|dtS*&Y~S%qkDk&XjMWdP*;#Q>n5vhy;0 zDfZL1ZOkQyPDIJc3fV0wz9~c8P2Pj31<-|R5h;4 zNAuB(vF9k}09Ykm$4yjzi;UKM%v>QwYHJ1)y8=rbu8xmxfYBtfkIsMwaMVOeThI<( zz!HGXw2FcdLfkTahC3a3iqQf@0^wd7-ghjnb$*KgJ%WWc{tRlmU2S_!D=37 zgWWTjG)FMtGF~hgr}JQsdVmmse8d^_q9H_mwcydOKW-jZbT<)?cJ4)i9@$jLhMfb$ zV@^^`q1Y|4<@NJ8;GztHA`2*+0Z9=^NZ3LrMj6Z`H_y?Ilcm{v-0&{y5ddZRq_;6u z7#^s!6<@>?9VTg#+x3v?Sq;pjNJ_!zyvo_HH1zkRP|F5^#oqH)nHSyC`+G+t#eSg> z0Jq&ag~GB#1l-?n0#9jl!TiNNOiEqQ!a`Arh8S9m4nx)O&bH%+)rOpHB)1H13F+rr z9uyI{i7tZIW|E_wYkhU^DeGwYr^lrgC6*=ya_&^34D|9IXW%8i-uU{8a+aA|0Rc2k zW@tnEa124WWoL2wGg0YRt4Ct(!S9|08*;$p^{TXW@uZqHaGAE{ra=HG{`~eN#CzpE z(A2+c-SS}U@zFFx2qYM4!O^JR2~J4*2n3WpTTN;i%r9xqvkqyNL5fZRzok1LvNwO0 z7sh^@VpJ zBmx%t^=nko#=OG6(fB$D%3)t&3ds>L7O9o>ZQonS3m^ljYauj(41zm5^=O8}hr2XF zp}~~Va$BUASqnMILqXR|&{+^5+qL)jhiS?P5eG99zd1LGJ4WO1b$EPEeAk?E2zJjp z6}W?YYYTmk>X>bXhBOkE2|FzQv3SKiGlj0;NjMss%-6v z+@o6A!_i{Nr(2HTP>?Zu0foN7D1ZmI;0z47w0}{Q`+{I-+hU^Djb?j+W`?!JgfKG$ z`{+wByKnY)fE}*}VUPt}Q!zX6lju-$m>A$4Vz5K7d#(O3X3@zOVv80$$CNiXPU+t1x?SQf& z6LB17G+TkNP2bLV$h%-`Hg@qSlq<$$9!?`4&U81Uux%X0U4)~1&U+fV=#N953KE8z z8`1G`){E%$i8-hCM+c&!r@uy#YnHanM>@x@b`0~wr+3oS;PqE`1pax1x^c+vsv^-a z%qRu6!FW*DNbNPJ8Tht0pbQNtpkF|2QP!(t+=;%ax3hni2q#(qvi5wMijlum_W{Hc zMkbmpRFKIEEgO982UlBH#Fz^32@Ti`h>)Be!Uq>EM`12V_J*EI58eJ*MhQ>%7dSYC z8jw$77c{FUT|XY)IfUR8a*0H}g>-N{9L#JRZuR64Ildy*e5bJ}YXgVqPMA_5`os0b z#;YLVGXY7I;r?igv_>EykQ)e!&ZvXx^`t5U57`!6w$JJN%D1`;4>AAL zWsF`>d7nqt@b2sCi$~FDWSNC(L(5gV*NSJ&+r8v;T!$ge=9ljQW;kCPdW<2|eOE1l zJ7ZW8cI>&X$Md~wIo$C=@7`~XW49sQqS9OSj1K`&E5lE=>Q6#fUc96a@uji%0G2l6 zvmM+CX^7hh%AgE@?-xzRl3Vum=bqU`>J$Ctq#3par?~Z3BOkm#GLvdoNc53pnE_T( z84zhc2|ZrvfSay4*w#{hRKuS7Gx&vaaNc}^*HHak26#o~=Ol&%9aK{zz{#7=IF0j* za&`I!ogeht4a<6k&S?=4o{%VXZ!JKdr+2>_)uTh!7k(GGcY19Abli_@4Jogh zfw!RNsa&AjAx=Bbb~UC)23Z*kmDn#ZkHtV@V#*Rs+;o|e#L{7i?b6*|7iIcLllbq_ z9p9x`zM$vk0J;t5-t2P8`EL#3p**%~AShB^aS$o#BXg^27ed#{6% z?0M{CRrV~-A^UTC^qjBv<@LH;zJI{?mo75Sd7Q`NK5qBxy2*eKlO5)5ur@L2?+I!e z-Jo!D+|kS4s8a{Khg(=d55XT8({!%i=9j^sgji3Ggp>dR zp8Lft2^#l_k*(p+DSQ)N(gkhIh^&mR{;-9Aq53?yPeN40`Nx^y;>#6Wfc!3Uzj=?3 zK@dmr<83F)-V{ka-2ujt>GqJIkl&Z{$17eq)^kw^|6~dBfD4Z2-Y>aVI)~|s7c#MX z0CGPY#(qGan`^m$+k=ixKW_jI#|o|3R8j$_>}ZGv;`-0412Pr+7p2=k0Xs2Q0(1E) z23b~+hID&{aiyn_&%%5ZA1rIeV-hyD`94yT5n4h=Yre4lQzty;XfO&D6>;FD2kELV za>!Z-B!=y253&V`c)8tnExeAbp5kuDEfSXR$}*p1Zj!#>_pdcoSsZ?dcbXM>fD>n6 z39Us)E*Z--a;)?p+cI-O1B54;Y?#i)pXoMX!2Sel@k?`!!GCSqNq$ga;j_pi_Z|+^ zV5wm(djaa@_B3$IPK+D>VLb@b^ryXH#^QQ%3TS~gSbZZ0y^T!%THB8RunQk%#6u4` z48l-GEXy#pqOd5mUY5>H!r%A9osgG78jzxCNX}B^_;k3}eb@pzigduCWb&fh{(kxd zPO^R<8J!m$N=o4QTjQ+=#J^%LPYHYQd5DLM4mtg?NQ&2!zG3)0_?!V0RN3y7mmZq` zqrw+&aK}$O^K<-s>PM*LgYcn`c^H(n88$=Fz$pXHqjY~?{rAL~vh_#s4M5rxi|=t8 z)M~@_8&kZK{f|+Oc^4^Z4zHO}0Prz~4+B|2PN^I?opw;EJj!{<%fRrm2~w+ziNm>n1=de;5qqh&F$-ezsH5j9ss?@XGz-yE({N(<+03dTyl@3n10-o?KKj=>d z5~wn*d_CHmkQNp{4S6#!y}UL50Yh3K5^+Spy=BB0aD!(EsGbIAYQRv-voKy@XgWt# zDGGkgRD)>3KPJmd4I?soU?1>PIm~nFEa@t zUCm+}bYU0Eui58>K{a*t+xiulB*<*aA%G&uRhi=r2!{B+WgwE#Y*>ds&uIU-_#jhv z4xaklp4^cmaEb35pxk4FO@aiI4@B;{{-qrV)FG`oNBPI)wUqjiFJ@Jh1ss)H9Vy8a zkFXlb=K`Z9=hb+Ue?NT$%AOqfVsiw-?!|9tnqFZZvN z&wmd}#fd-f`@diRkKyeDlWSY@(SNwU|9#0*?C=W- zg6DXD1MC0i^?(1DxgIuzy$izs+T;K8*M2~RtBn!^Io;nHod0_nVTQ0V`J77l7ux*C z?E8I<@XsR%el5==Cl3DCM)aR=(tNx4|iFI>sa<9Aru|z#i?_%9S$s=Tc62$GX0VWcZiNf_Vnv z6%;#&em|VP^M%$!+?JT~Jm?wnHK~&Uj&R5+8xWi+3A6`b1`9Os8a~edmaK@EgYTV1 zH-zQ40&BX`kEn2ixG2gL!U7Q8(tSXHLP(2tkC__yZCTES;&J_Jmcp3=+p@V7o;t2V z3iyMP8EEto`Ea7o4*MeR2`LoI>WIn#X~}cWspb5~R^aR4umuvY&N9gx=3SA%!&GrM z%EPLgC(d85bkyGu*~$^wDjaHEp7n18BDP(q?$ZFS&u`DLHp>|8_Ry&gDXg#9fHDQ+ zybTOSE3#|5nS*?b6|qVoXL{o~FUJc7zC4W7Yh^M*4}qQ?sv{mOFkBmLK%`W-AwwRZ zWZm^c5XtM`sQGFZ!cZFfs^3H1jv<5%(G{VL(mgh2jc2a|1yail85~4IW)@wD7{*kS zIc_o&{H|f3vS}jNYorisffy8ev|zT*Ewz<7VJ;0RUa!URT8|RM!JQ&8DgCf7O{V<& ztAvW(v#y?!!UA-CLI-3mo8jQ+?PGM0cz1{^vpv+dz2@s1>q z;e@@}=Bmdzr)vr5{dUF=D!3w+LX&3lq1lryZZpQLu${HcZY?0btdg(5Hq&mvrJSey z<5F(aEHYesa93TzX56~7@em~I+xUJ%^*rQiU+A;lsgxkZG{bGai3z3ro1rgOfi;Vt zQDc*xC5f%r^TQP=T8j;tRsA{^G^dxNjVVt32oW!@Q$h^rKHN zGye_bdtGfJbGXE)7kmps=^`IM(@>B$B=_t;?>7O!pfm$cSAftvph0-tB}LO6{_mhZ zXahWg9So&q2%v2T9V1_QxahmTe0S`Za>LRZ`jHHBxM+hoEc&%YT0@@S_xt;c2^z9n zkOAS{+oet4x+JDqG^Qu2Yj{}Y!8>9NqwF~><))WaUziiR96^Y0k8*EbNXvNxDQgvN zy;4l^wZTm)kexTq0^9W)gk40GVZCF!2qcMwZ8-LS;W+k74Y`85>TB?{?`n4cs(xo7 zHfKpIwp?c+4hYH0R92mZ&57V?9Me%vavo36!VPy(2*@|&BkMmg$<^WS66b_l`TZdi z@-70<-!C=|M9pl$0}DrTFE(Gh$O<5JfsKig@5vSte>Ft`Nx)jaR~GC9TvL+_7!W+7 zae&~&?+_jXY!8Q55-x^C;T!lQvL2s+XadW=w7|!ZI$7+q?}=b+7P1#bCVwWQ)vK9*;fLsW)`_ zp{7vtJqj-X&G~5%s={bwhhOjQ4)-34_zbvw_km?}!V$SktmJ*zQiWmg`~ocf zIHs`*sD3atx4$ma6{`s) zd)~$0gE##JuQdk`gplw8-B>CHYYfhtS=asfkneohI`b280FWdP*Uu>2mtqzwI2L*e za%qtw{ga9r6V#;s=T7z{gh9$eFoNE@Jl%>=5*_(n`LBdd0?6=C{v(%n3rHz$407|o z2B~Q6o~S|vA?DxeO#fkc!RFbfzTcZC|t#R*u3c3SiI=fN=(0n6)H(YN}89XSN6K~yBbeGIP+LB7y! z1Qi>q2XXJRK)4BE2R!pM5{HB^URjRyWIrFK1$+Jd_{@4VU^!NYsGm)nmNHys>;lnxOmIzhlfn`LIa*`f7&|34@*Mm$5} zVz?A*59oD;@F0jjXQaa3u-^(|wSaIfwdlQ#G$zg(lKb)m%KRue0<;3dg9I1xA1Pp~ zETY=)vw;8=c>N}#3X}UcV;=&lr-Ug-4&Z?KUd&&Itid}-P62?kft9&L`7bD#xB>op zrR@$xGm@VX3HwefsGt~&?CQCb@Pa`5(gvA-Y)GI;e@!x*CtYL@Wj3G-69xn?ON$f2 zP!O+J?6#ugRdlz4L7{i(J>w*7n)gZ`4f1?%>So$M(cj$&Ae%ZAAgq86Mk6shjHKRc zi-dK+fsB$$?WBIno+sG@N#Zm}9?QhUQ@mH^t!E+mFC1pJpaz3uwZRlRX$*kt%K*^6 zu6*o(=N*dRpia=veq>A5LGbVt*kA$9Ti#uV^|V54s?k{JUsx1>cfH9wdeZI{yy~XZ3nc4WyfPp9gCVslc7Yk$q{ajV)T=lo_nGP zzf5|&ACrBW7YO$`lamPk%_+%4?5~BUt=rFz3xu1SRzt$jsZti({-8M$nNdDNt<*VL z&jc(CuHo*MDX(hj@C%zFd2<6h3HiXr_leSblbtdD4lXh|uZc6lJmavss`D|p_N#!V zN52X*Z7cRz`MuSi$$40n%2tpLw=A(6<48z|YTaWGgy;edV4n}$^!Qh3@gojI0*FPb za}(T`whAzW3$;s`i$w>F{rdyt{-gffqbZXfo)WcC`Ss?1LeP5gb%GA!jbj^gIvGld z`QhBV=1)SQ5Hgts#snX>N7shEm&eLDhwC04Anm*TAvKND2w#-lwyp}Fp%Qc*__OVt z>??!be5kUcVAVbm!^9zDTSFq-JaEtX!Tc~e`ok5Yy0N3Ig;dVt8B(4N1fC2_inAvu zg()y~@R(SgZx9dQ5C(Ud-^4>C5!4)~3>!0uB^n4QDM#NfjJqpioL2&)#e6#goo_dn zLp)H(3pBx(qZt)4W4NW=3B3mA+)3N#RXsFUMO?p4l-GqK2hj?Jw8sexr;%xDtWUZYRnHawKC!g{J_wC?Ni0?;$Mu z{y7>SzLeH&pJD*@6vgR@Ih#j`M5p9m1T;XMwk!~|bJuIHqv@Gnl?BaJn(-$Z&kjKE z&Tk{K6aRi0f}Et*TRwZ}r18(hugz>ctzE(Heg_5WQ*Wh(T^tmbVisFi5o7Mp#89PD z--1GrVW|Dk;HE^s--0}4kI7=xH{4LYPht$SOaGVc&JD4fGX9e15zequ$QVkn#9`}yi% z(@tK7ueVotX?{j%aN(dj6(hX&D@Uq)^XWA)f@Nye4dd9-2a0YsYqOAi*8c0~ciXkW zG>wCLY80edJn$KlE7eL(9k0u{gfPu9JdY4DO+=sb6oP21;Lh68uXCF#ViEnE2ir)( z`lw0A<9AHFB5&DZ%@7);7_y|iNOi6(d3ujbU6U`3_r%0ah^`JUX?RIh;(i2WknQNK zLNEt!klDa>gTuYvt-OikH`25~zr`>o#4vdf<}5&L)5VZal1adb6Pyfwlct?N^(_eDOzZC?r>m zsqKF@?(+84S+;ZHP*<1Wr6UDZKepePXpp$Tm;gsgbc)#qz8Pw zPzEdaF)+iuZr;ze9jIZ!DRui}6^9{?{5FSr)=6tVfuh=sPkocsNKl08mR4xufj$Hl zUw`z&D9)25oBnFlf zc#6C4f>!{tousKX-1N6W5q4m+N<1#85vECKqlGXBM|zoDTRG^`dN7NfO(CWl5E%** zg%R=?9F&a`t009CSWMqxp`di}^&T@Xo#0 zW!9Zz)4|^58+yeg>Wd|INs*k~qI)T_lv{FKJsnNS6^jqV!@|p2JuJ=nv*r z7UbVyFS)svR>^2>AY&U8k}dx8xp=ifjff|;Z8U)=S#g!TNacBf08LzKKmq z9i;&BSUbhZ(ALWt#0eti*WC43X7(Af6(BAuZGU6uvDkaIN8?vVR?yB%AX<^(Rd{gy z)j@*ST)g|T`3#6fSE_h#w+=TyS~?4=gZVAxE0&@r^^1Xn@M0oq#bdK1{? z3Y-989D~+tr{}t;pFZYNRSPeJ7JlgloSzStaL`rm-Es%U4OMno(FiS>XYG*n--zXHk=>* zg(=4ifDysqT_0cU#rt%ct2OIlFMpb|cm3mo`6CX2XF6pHj^;h*Y;92~@E?36TPzy! zZN!at$$}zJd)-Y+&0M(MLR1rF^5h$XzYFfm08?Yp>w=St(g6g41jVQWQL>8ekCw4s z^>7@KA$>p$9@h{Vc5~-<#F^>ekUYHO5iWxZ(Hl=)Vog1DJsl05uds2xWEf*wLFKsT zTiO{_9C_%lZF5*aBZ0$D3Rw$lV|MFNS#v`dMc6@}ewxi1oNJ<>4?2fQvublxi)j_; z>?j$v)k2988bZ3lZ_dkfi7Qqx3CU@h=rsx<(!(~V9t?2?kIG3GoLNlV)b8~UyATEK zNqa()#(?jT4!ZZAsnaO%qpWvva$dh0DbR~AX)4i-o>Yb957 zD(q~D(_U7D0Kq*(o}ZDb&Q@Y-t<(r@OXOD})IUJ=)aP#*1>Ou}oP94x=$_VrPE?X7 z|Fc@D4{AnG>s6wtFYl2&cQMEu<?RzhlDtYQ+A+G&= ztEgdXjR@J}^qgO){cI;0EtHR-l5}(+O}X>P!|3!4u%t5 zZ`pq}qUm+Ead)1p5n4NF0xw^}1{D|dd-m%#p#5!bXg?*ZmJIk!hp|B3zK&FY?AqaNTS4}p7&9%K*oE!jb6W6+A(qLAb&+)h5h7H z{1KfBqQ;IRPm3Qk9rt>Y%tq>o$2^SDt^c(#>)VN8TG`g^VIT!en&ZQj76CL7| zDGMs#6q$aMETN>{!L03ZGm0;?(@5V9Yt=cFPa}#QltgijWX}^)h!BC}sG=eQtaHy! zd6^I|(Se>M#@~AR8N>Kn1LofEdCh`)y`;BC(YFr`P1n&3py2#^roGzIXK(M?QQ8sV zFwb7a(7f3lv@YwoEF}G{2*w6XaB+pxewN4LvQuNNyA4jrI}{;J@deE(Y&-MT|$O#*6LXwHwzrr3_q2>*dk$L6R9>opBaQfahTY2;yp*K-Gx4Zmz$@T#%n4RJTeMA6!ep5gh3+Ed{y_jm`p&Q=TA5h zRj0VGSQJ>JE~ncb)8eJbo*fH81`M5DBJYt@#@^Xu8h234xpU9WrtC05N}<^ylc$8m zj#tF1D?s9q+RU4L&;3UUVchg$Hd5V6iqJ{|LvmN_17Fe(l$X+_*`rwQ80hxUmyn30 zsZrFt@8`EFn74Q`yi+3~W{%4@MyU<4|Hw^jjIDFP&iC?nny0Nvi_8rK9YfkubnH^^ z#nZ36+R=roVaM&t@$n2=zp?zB$>yD1J+(PYWqq%#-Mp--x`TM1A>u75=SD+QAL2!A zI#1%a2b=wC-T+xKhn>Q6{wDHiuj*~>#gz~+F}RLEy}@=`C78n&KMrpL@7#u2XRB1h zH19y#K)$bFG479)=(Xb+G{(cWF+Bd_iK5>4tPf=;nl6g+d!!tlm312y4A?bsQj^MM z3n!Fk=3CzEl#y-Nt0WcT8RmDrY52?droGQeB6<5?ek57cS&e&16g7&)Y@Wuv4(J=_ zjTHBepCuscY;|wYi7S!p2_cf0@JMX*Hm9W%9NX8DD;h@HerE5r<71w$$sZYWEC!gxTg5`KocTZ3LF8Fq1i^D2;KepTzV z?x7^GwR?Rf zP@%WKr|mBCg_XyCY&XVT(pfk6yIThXISEQ}!>(B=v}9pJO3Yk7RKD3HcTIuJtR8en zDhwA;$L2S`D=O{arf;cTQp3BBHW4e(bsYyBa0g1(DSh|WTXBd(j zd@YYs3Rt;WrFWFoygpT;=O>^A6fyn{0UsoBlh{5ouuxrlF#gf@GiA%B*KX<)m-{UX z`w?9LqQ994<$Mlt3#MNq!)%o$xhqs-!W=4c;{s6^h6wAnb5JI!P{K(Lo z;BJSS?Xb1QyV0QM@U!y3xjvzLv(?XHd3;tSAe3cOzB-75!s^;E-l8 zRN4_l8|COaE`8*C{L7(2BjP#_dJPRlVDEqu8BUmEqWi|myUfHFifh}o=Y$B9D{yD2W)4glj-u__Db+MmhUO?f(!|ntTe`&?)ae0Db zoE>Ath4ujnp~M9z0}d$TTzD-=Au03hPqEoM{mxBe>k#nLgZ&i&YInBu@!24ij_6p6 z3AW4!`1c_+&vC3=hU?ocp7fQVCbrH8yYO}Z3+b!T+lNvK29kRzd%msP@YUi6$!w-4w z(`JczEr~yM4e8Yio;9uYC~VYhyG!6CFTU}>nsHXTiJ+E&=9);br1L}#_5Ndbj@}bj zR!Z}kGU9?3-5g9S%FC^Xy2yR#*XG`l$67zJaa$&>SowUTdu=5YT9uV6@+Yu8>b!M- zrFW81kU4d>IHY2J>Z#y8e~jOT;A#Wt^C;I?AD1MM|FY$NT3xQiAWtVV^zgyE?C;#~ zY6|tWdW#q?1Z$+2hyVlOosWADc~2=OlJmEY1yXgA+O4`!Rje)ET?}>Ccb2N0XYOZ} zUeRtsL_5TOSAebzvR`(5B(%h7W^r2N{^_JKu zt!F|GO^`xtxP?}rl>mf#=EP%v+#c~sm@?j}jLn$4!78b&w=}CS6W|FU((e2FmVH6P zct#uf(YB!%_YDSvidZ{!`rI*T_|(p9#U`j(ay9x}Zh8$JJrZr5W3zclDzcrL{SeX;oi^45_ZPIf^75YXg_xeTE8g|<&t6(^XT8XK8i=4xu>`_2 zE<eRUxq-BtQc#2hHa6gzyU@YKWfmU5?91!DDge%-Mf}*v(hwo6U>}dpRd3<9fq7 zSF8y2u%tI{SveiJ33$uQ7;g&>aL&zqn(Ha5~*hbi%zmoknfzQPc9%)iVbh>vO= z;oM2cglqLAz=kkns^;qnWZq9+034|@2D5UfdqWf~748$6_qT@gZYEElKsY&{HZGAt zgNGZaqF4F`5_i^X-_K9OT9}tz0NtY9qg)uy}H)rh4GY zK+_2;D;{p}b$l#ROOScUzGy<^EAQ(3I}1?ymH48Vh>6WCgnynJ>fwCmS!13fn}+F< zSFBC5)d{%6N|C<*>1tKw-GYO~#MY?G(7pKWiik3RAiaJm2bTvhBk~4T0t;yo%XvAT z4|VDt-anD@ydIUEA{K@-z3pYGdYYz3w-%f7Ol@JPAaOqLOHrhSC!GEa%df<7CW?)6 z>Ux>iP)FGJB=lQToXyldiOUazqfTX==y>@@VK_G{iTm~oQHvO z@oR%=ccEx>zpE$t+l~}V65gb@uaPwLMrO3@beqTRjQEJ6eWb}3?Z7$p7a38GOUmi- zkBZA;{OuXmI?&!wUYA)|g{V`bbTa_t4jHhX<3R;n)$7H2c)FLVv4c@>-ou{b6)okb zYXh$`X5=0sN^3qe_6hWmc-{T(h7?#FzLg$BBQm3Il1JQOC@=^~v2<=ff-*j6LNxM~ z7;5}W9ujMB>-O1W)a=Gi0ZP(PEp47_Z#2&BU~fCckT?9556RL>Asx#gW7JMS`+}w~ z$|@l+r9x#v=0K80KQge>e1;!);T}*^3|k63uRf1{m)rOxH1mc3=;O+S%YytYq-LfY zb4mw|sg~AbiZ-s>AUf8;l7~AqouD$YN4r#WvWy8rri*_+>0SDkkei3ji(AaDx;?t> zW$v+Rjx|TyEaNRQCczWkd*zHHElmDzND9O~J4WS0soa1_WTCa`G!26^heE@vPr>;Zrz}#JX;L0)Tw(>G6=K+x# zkHcI=#u}g{^r~etx@#ahvsepr8-1D3&ZAu@j@`eF2u=1_l!xVl`7Z8N`?Okg*UKOL zkYx$W+cp6O^6GF&uUzx_@bYn9vAT{x z&=69bIjioTp83qxPo;(`GPuoHKexQyz@6n;e93cM3;EKoC_Ald#F_F#BR>kd08-q3 zh<-+n^O>+n4VK^j91UF$p9fLzJC?7V8>ePuW4PZ-e-Gik#r^R$1}+L=qUHT}b$%=g zUTZDOqH*0?9pASYF|L@K(}C6}Oi}o$u^9aoTeSoQ{bMAYi`fMq_8n!_?QLdw?O{;9 zx3aQ(r?a;g=p5!^{&JNnFfLOGB|`l<1haHQkzN}p*H>&OqukOS>B^WX048B+(TEGE zQ~&6l?#O;Gv?8rSj7seMjYXXB`SUL_Rc^?6m|K*)oKz9+;|lgVNs3i(kwA~+e4AJlFZWL6|A5vTbC%=<0X5Ng5^`Wq-(eAxf0A(Vs+>r zyELcNzm&8ftZ!2T;1pLEzl*gH>SpZmbx?l2VLnN1#DSZORc)izc{6cvd#k>0X=uaa zcKV9*xX-l-dt;2c=uFl|dZCe(wztf?e8n5a>F?d=C2cu7L^;CpA}@eTfC5t%o0u=b z4IVztjonuA3x49ZB3p>%nq}6aD6V??pEaID}<3kvv$so8>&$ zH3(=o1{!!h7!ZSioj75SV#p2cowFdV**W*&`We|saj>u>&E{^9{Wz-*5ytcOTBL&E zsW;uihV9W-3mbD4s8?I_u=)N-jcVThnLd={c@S--T>6O1j2%dDW_SNA@o7kBj`w;d zkbCM3!SQw+ow{ouhN)CZCRVc>NH{@7E9>FiJnh|68oqgM{Hm|g3a{Hw)E3^COk^y} zD-7~hu^kd{$cqLm`_Sa2{G!|^28nb8RiFy;F>0~NcD5RSwGYd!=`nQ;W+BWG$(@tj zT1w*MW@&ncZQHrFR^rcXR;(YjP}{ubSai&L2q_!&+DEZ`xpKD98x6Hrj{uX?5!mgBc7HSl zzIWZiC+#sOII?du1*&*pz7PKP*@#yW^m{Pper8}}w%NBqH*!+e){(?hOT*RVvBI&UxMcSQq0PF-yU<{bPv$uj+w!MP10dYhA# zc5m1Y_SbxGj$cYx4flaA+P|ot0HI(oWw(H@P~*0;+{%_%S`RH0{oAi1MuaRP{~WsI zYl{T>+R)Q1o2Vq$_N<_b#r$>1bIuqC&V!)n&a;?s1@TmarNP3&6ye2+aW~!65fxO? z0Hv--@e&I8;A7vjm|Z~6y=VJWVrjlB6cP>EQ|!_Qm%@UWB|NJXcEP*gtm~f+S`;2+ z3P7#*xS+O{qpSE0J`YIWI*QddN8mPWisY_dT3iuMt`5EDvcw3@2<8co$Hc9CtN(H< zr|C3q2a?Ewzp5{;gH2Xy^ZC%)a?+cXnAc2Vq84SJmFPiX%=$8Zf;PV55Y+baV)|1B zt7gNt72~&sR&A;hI}v=zzS9MthjFx;cd;V!7Hk)%(lgz2Msc~DBpLTU}K zb+-2c?<5U9b^3}N=lp{>pLK%iem4p@C(y*GUQPB#3M#vn91Z5Z+0#Z1b7*MAmegL|GA z-M;{Oq@ttbx0p+}A=WH`!ps*KuH^Qsh@d|!ON&2#w8!jc0hi> zG<=5eI&Yy#(JQ-Wo?t@=(qO=`NQ<+XfIHvJBFHsp3?vYHd%$6}MhXdKEyMCL2%?Fk z`w!zY)P>u=W=JTxN(Iy(X;$E+C3TjrJy{)K=uH`Ekl}Q!0ul(Kyy`t zP548-`8}PupWWHII#V_s!%>mh zyd&nT(%q;RbySxY62(U3v9vk<;*eyq*&ylq0J52-trrVnox&WY5H?!OX3&hR5V1Mg zSLnKE>Dh1YJNzuebKThR)a4g`b=E1O&5=7?R<9!-k0QZqah?^_sVu1drmI;l3<~v3 z^yO?M^=fqGH&?$~2`U9)%(yaxmaEYz+L8<}i^u&lf)2}cT-*)Rr#1k+=OrwW({Pda z0bv8j^1vUk86MAF%s`)x=6u*Ye9v<~%Y0W;*Y@Edw6o-ZvxP_Ymt`w(3(ngueNt!+ zVc3tkl}vi=_n7k&A0R9qxWj9ebc{?9a>*5&6y5;X z-Q1_mv+;|dnSc}v>dV<%&}R8Ubnlw)eiyf9F9V(|XU?QG9JJ`|DwDP2+G-MJ-s*Po z-dcn_nR7ZXR;QQ2E(R+(QtdyaaX?|8SNn!Hkxe9Ve>s?ZdFtYYXs$0Ekkxc?#s_O& z{%*ppv3|8#qAgwF@|W4i40hsSk?oh>I|)3xQ?Z1PUsS$@^oTDdcgKhCm6{BoUCz@a zNykIuIrgCsJf#Pv;Y(#`7hH;{JIenkW=gf5%2g;Tt7FEObnNEPhn&-$tLkxEm=KMRh+@ z&O5@1hVjnQ<$)7h7dZ>{R|!z+aj*5)K+8ru%#X?d$7jnhj(}_4GcbLOB3->_ZU|aH z257PaeJr-mTVl`MquF2GuJW!XEX=aL#=B}d?u4MCCqv~&H4|HmeoK4l&HZ;V2fHx( zn{pb{Gmhsr;!X#Z`J5-!*zsPpOD7-ODos9EeB48rx-qKYeQ)t`fw8x7=y*e?fc-_N z5Mmq_hqBh<@;DtP|<{m=USDxMVmgaVgcQ5fgDYQle<{P z*!ZV#OyBOX{@yEn`}3rlJGiwzmAg7Em|(96CkBnQB>>cdJ#mC=<9X*rv=8gf-L^Shqh|fJmCk9L7usr3+QH- zOZT@YA_GF7i0e!>9HqdnJ;QF?@^xJ@` zLy5cY*8{c1M9<5zy~*8`S=U_`)y2fU&ZFW6;tb~E=v=PmT;3|{yYz*9(R&Oe;4PBA zzrGUtx>u6EjrEL~)9JTMx9e{x9Qh*k3p06e=AA0vkS$Gd?F5rmO^xzto!|-5_iE4A zk?N-Ha-junMHD)N8#HH$FRomvFW^A0Btmgh&vaSwsQA3;MMV`O?TVIp{%h%{0(C#F z$MNeM1}NN}8CE@F2QBY5tBSAz=Fg}#E!3ak4^&?o!2P;P7%-;s^*!fa*cuu8-qZ!^hXD=E~Q-cdrKIDo@Sm%AXErA`S~mHs}Fj#nsW zenYAv*7~Zhce7QJ7@_7{15eBP*sgGw{4}HBrBm~haaBa3SlVopd}pEodcLy{tMi~_ z^Azjzjd+ys7Q6AZ)VsVMhEK-UYajdR*jUB-ys>emgZKB>&Z81>UN%thZPkZk4F=tp z;e=?Pcu&Ek^D{eLiS*>!_FI;^16Q9+qdfV>S5A?fQ+ecii{hzCw(nDl3VDf?)kW7` zL>6CW{M2`1Kd!vACo`vNESb0>NTVhznx3IjE8#y;m@>&iN978ciW%pI9fNAnf~zUP zZ=K!WV8FHW{rl0X1Pp&pRx73ZT+$j#)|QlZvw%0-;@(~V9hdF#HJ`ce4LO{yD2m|# z34Wl(+mg_C&6d=Qd)&*#Jz{t0DD>oJ3BjA#KrQE8Yjg`6b=uGWVk} zC)80j?qLQ_KB-1e+_3l6OvIw=g1&HA&1~N`v)ESp!BSr#v4%1$QixW%Dm+OoKUzw* zf|gG|qi4>lUg*AD72|lgEmG9)uaxT4_1Ml9AAklo!O7#-1DAl)V-UpSG_Y-w4 z=(dGEf9|=={WL9dJq1#m*=Y#6WOKR)LD+Jh`mI5h<-TYBQ2a!YopM|*&Kl3%DM5AT z^37TCiGt81x9J-S70;?C*osCO%#%C$F;|vEtC$GbQ8y6`BC_vOrQ`X%YfdzK#rgE5 zp*tb$uIJ07D+}#BG&x?{u%lxcg|_Zo8-Qa)WR0_pe_N_*P}-6sO{gl;AhRk7i@#)j zTiYreyCX<*-#V{MmHUPH)Q*gAVzq&LQQb{6BiSK`H--r+kT;L-Rgrcc?D3sMef*qO zVOaA9yHT1J&&K;``?HU>!ki@_*q&DDQJQT_UWH$GD`G-n>!CLQyZcYB z&7Teo%zFaCeQpSOo{g}uht7CY{czBH>$Lr$#D|AZ*iAMdTyF98a~@A`pT*izWA72h z8llvEk9VUBU(U-^1<|m38_b>lU{Oitj+bT}Q7o$YO07u%izzl1Q3-X%ZA18)YW^L` zH!5Dlxw!CnqmnZCJDd;`aoPj8*sH7>~D?p(APuS7!%0Yep8r@FF;o%f8k z;`U{Io`KwzkP<)cm$Oc%lFshBL#$G>rH({9#M;j~qnt}-Bf8yQ>lb7aa)eU0C=SuT zy?INQSqN3feR<&*KUdigQE$-Ws;z2nyz!lw?yHhmT~*BQAw5YJdZ!O#NIGXJBdBEY zYcz1T5>9ed%yPc0P5W_6O^ubfjA-YhMwxouKPn?huV|4L_zroAIM{@gs$?HU1`Qk&HWURxcH;39hbX2=2*S#gQ+C6l} z)O8m2``uTdMbJB7a_VJ|W=y`2JsPEy9W_+1+P^S-H+{UNnazqJ{j$eu)!bvN-mID8 zPV154bavv}zUPr2Duu%Kw+NZR!^Ie<1scR!QBs6?9A;2KbUrx@Ev=z`f=f zOH)$aQ_VkY6wu=VN#57pgDmFvWIJwwOkM%+m!2FPNjuXMT3pL&-zt5Y3w8SXg-llR zys6hAJF!7>lm1qx&a-tm(6*XI$azI6sR}LJ?~HTB3Ub^_H}xyjyEi-%Ii&J^AVEW8Ic4qEOjj4%q{rAsQCfNZ z+b;AQFz2jU*T4`zz-Efg)RNM5=m-SibT~Ctq%H)Ded{}aM%|tZbua{{nvS);2Ldk! zQqRrp)N&6tNqO@_@}c0_O2$3oIIc(4J|;CTEE%NeZM}0&?)PT)==CShR}a%x%zyGq zmYfr6`X!=jA%w_*7sVS0sdEI&q#L_d^R<-Q$b4d)3}Xn#H@}fvPwlk1XSoozNoeGz zj5W29e$b0B#smxL3xw&1wi3n;W-28PyVg)F z9OFie$QrY|%xyJ2cA%%_5Rmf=6CP)$+Z{!Gzv|qFqRNk@s`+eE%O>gP8msl|$I?f| ze1Vxhg}FM^Y(R(LN8mC={;e%=+W!1Ki`meo} zMIVmiBc$JKe8G*}c*l$g9Tq(qfq7DaFRM~(_)+abcI4Y&OZ4)`@kxAt@!2w3G%-Ln z52d6ZdegTqW9*xwUdr3>cQtWW8)xyC1!njLw)FBZ1+Em7ZIoJxDYfAEq73Tjy3iB8 zhx_lp)T$hObLkzv+V=1qE}@fODX~IO>eUWe)P%uUec9&DkPi}{AsXt;->o-WGiA{> zzHrVfH>dJIAsT77r@Hz{!NA1w*-EGnruyRrH6;B7-8r)-JTy5^HhI*MKVx#LYFe0L zbWY!WoUbcmUFUiK1&^Yl)A(f^%KHMv0n^B~9=XJ|LVOPGCnUj?=9tNDpRCBj+w)XW zTuy`PZhw;Rah7J=xVLDUDLDFm-_nx!nUv!H9#UN@BZ;ubKVWd9idDLqk3}S0(=r!= zG>>Xm>L%V^I*z$lho)xf6OU|STvlO=qdR*qAZQv>etS^Ou8vwu?-~++{bq(#j_Kz9 zK$3&;imN*juU_dQW6ddynw`s!7CbeVx8UyzkTFClhV}zyV_m z$f6|sz}fa)$9tbtD$#i7iZklT#ZZl&pK?lQ`^&-OsvYEJj^yvxWtS>+>yFwir8EP+ z_|11{aidcEQuO4!=lEPs-J{GIy_TYeS!c(V1yAx&5uzbEj+U^~viNOKuEX&y0~1$|2Z{JK`V3 z)Kpsl$^PSzw-aa6wdX8G)jn!EWIZ^i?(FaEoXWRb&aoCX`W2sUwytzvtW2$fv5HAD zqZE&dAcX+T?kn~sq_V~-x-*g1X#LW_oD`1Ps)-{RkMrA_7w>iPkBb(#oUy90)9>es zA5ed@xvi$)R{Br^Z$>o%8i!UfaUS3bw*2bov19v5*S&cv8OPrF?$~{Jn^K+=%)wo` zYLCi)Ti#^Z3@#|^{JB!Yetq>D>AuPR6@Efun-%Rfc*XnMqtFfY!RnLXs)_6jDqGpF z--*UHeJh3^kU9sj&plppXMYwkaiBUHS+c4lq$EA+q0>|3MzgXXBaDFI3*LFYvQbzF zU;MRp%{zJlmB$;gdfQN$T0V$9sr&psQSX||OzL3O?H%`OpAt^$XI;*b_;S7v>p>Q| zJ6XSa+qNSw((GE9e*M6ByxlDRM%~KJ-jG;)C_A-OKXFyC{<6(T_bL+L z6O1*`xIs%`f{trIfijc+8F}-sOa$OvPa_ea{=~to7sJlXgB7YOd>E}nzYk^mhbomc z`u_}s`|Dkf;LAWBaQx@gdvT{6@B#34n`aIQ;wq1Ut|Js|f879h^?$NCtZsutU*X(6 zw-iP~c!t7pV!l38`LN1*NZ3B zj6m(N32jck=n2+0O+c>Eo~9|+zi#nAH=E-M0OFsB{W>o)`y-g>8U51IcRu_Rk4y^x zh&}qxF>q2k4eZ!=8Tsk-t?`dPh`#PywkQTiC+pzg{r{Hnl11s?=Z2R@dDo57|G_b& zyMQ3%f+P198aJLvl|?g$f(^I`f|2eAo#A?=%JxTM)gPn!pU10g2jhF@jJembr^l`o zw_GR(2ilJB`Jk(`{~kDheJcNID(EVv&*SjAuO`yXFr$-ytUdJK3HqPO^dDb||3CKLx~mF7FK`>BCK|)Xo36&C%&@CX{&7wOM32ByY zML|HMq!FaMO9YGVhDAw8clU2z;NEiI&+{C|`vKm6JXCb8E6#Jyd5$^e7&gcb9#?IV z|3AJWh82k`W4gwP|LePi9_lCvNJ}?n$HxE12mg5#LMT{N5X%~+|1XW78boF{LSIrFuqc_TAr)v^}W;W{H*9o!6j|^C(JzT{M95_cBF)BmnUeda*&AUXYp- zCa&gr>k20Bp?{3U(CS1NIZWP8QXk0L{ki0r66}X8@~Eoko&UW||6Q;g=U;WuA)TFU zXsi|0q}=`2Y>%A4qGU3`hyLnN4>)SoI!a^t5=o`Wpph{vm>ZK0zoq{Yw;Px%7K5(q z=`yF+N+z)2cah};B>pED)FexC-rV&`8a%|BQlP?LoNFFlzZ2`aU+aM~xApX}>+jGSm!? zq=v`{_OE~4$AD$=(qH|fneTu3@SQ@0O$%lVFhjjp86*O}66Mn%=@a8zYo~K4&PQ>t{0;UR$oX!+j4-Tk+Sn(j{9xkOpVx+w~BbR98 z?+<$G2~pv0v+92=2rv3apaHN+)dhs6*i6Oe4d9MagH-CnmVbv4R?}vE#lO)FA=ns6 z;6^owth@c7tHUa6uB&bBFd>@JZ}oz=wY{*2Sb@ zfeqgC`JXBGE-Q6vK9Pd%Th$CSEEvuCx-VkMVF5up=M>TPZXkWWRhvP~gTOwz7l?(S zq5qn>G5EnVUAmJ?a%2S%KEi^7N&%vb0g|}${KUkHLdY;}B`!LJxM*joj^Yfj6AVag zmY$*lN>hpjo6n(oRE#nb@TLxxss9PbNwjEKbsgY4V-BQ=nmn@#kK^`Llnw?q=Z!(C z8qGvB0VRwKXRKxq6BH-~&<>I2GHH%sGG}Z!3>Go=$j8oQ8y@wNO2S8jn#;k0f-$y0 z>n7+WS^!-s7s?G~;cXLatjE|$9;L1qfEjys?;EoZL8oH|h(?W)2aXs>v{Fq1wb=sLg0ymSN{P2GH`R&MgwGeVY7~7 z_CZ^HLuU>t-<;|t z2=fxktKh;|j41ek49KzU-^|Gaw0 za3y3k0n{6+obs#$~!sVCVQRa>Oh@B7*Uqt?2rliCkZRGWSWoevCOe<|1A{bF(>d|z_r9w zNFfMFm;+~dCusN}N}UKX{-g*`?T)MH0%M4)|=)#|5c(A62aXc=H?JBI&x2t~)mfTG|+B=$axu z;{NSp9zuZ?fQRTpCWvE!bn2E3mcMO@*pnehN{} zh3P!bpd#@bid%YrpP{<@^vjD-ctmI{Bv(P2*;;=``V^F(C7;KgCFfxHb5an}hwJp= ze|l)&`MZcYgUwnzdI1AO&{^oJuPSirL_T{Agaqp>;&Z^W^Q{=%TjP%QN(9h8^oI=0 zX79U&!I=ojTZb=O2l2fi2)lVCFTuLs0N2228TO=TTR~01x@|aWUDjjVh_`9ubOym2@50ByH4vP$69McHf=WPH=~wGJ|gg zjNw~4Wu`;Z^7B`OOWX^rP2i8PHJ(g5ydE?;oXk-AxPkpgP6#oCXk=u#0At06U}`R$ z(74LxhY7iafo|k;GZ4kGUDR0+PFW*KlxI}vJ%8rofq)y_GrAIaG0qDharRF>f|`E= z@)qdKS=qzbgz=iTV^+|VP}u`UIB%1q5&jo&imgETr`|%yRFqsxwzhugx5la+at87} zCs2AsP16IreMEiJ(He@i)73zbummH70_dt69hgjr(?8mpOvHd%lmXmD&fu$7-w9V9 zfIV9+1H%pr;fHULSWL-$U{~K}2FOHgznsz>e|DbOP6qWk+W*OIZukslRAS zE8vyQDfrBQp1Ipk$VHuJ?=A>UgIUX3klNwWMw5fuC29osegBcSLn9dj7KWXmO;m2$wNlHz^W$Pv;vuzfF$kN5XH~;Z`f=PhbwjB8=hhN_aGwF zPXNLFTOu>UAgvrmh(JiLg8|aR%$*bgIrM}D{F?&kq-~2Z%)XNML9D)l%!xG)<5~>j zNqXZ|Q^JvR0YhO09Dyx?khspDnLmqoRUoZjJ{b9_?I$jPsH=v}Yd2*8c0jrCwg%WX zfoO+;iq#OI=LU0z9v%RMW+8oKlh#!L8M%fvY0fCrTUoyeOkVN$j4+%_Opi%iQo1II0;uqE$sk)mml z(>d*Rr9U4`BWsBT3di0}seFdD#DjT|l{x~=3xQ|c(W%%AqN{4yV0{3~!2Mj3~uumg91mi1t-_G*Y!@3_I2TW{001wIgxU5YN zfFJ6Hmg4I9o**4*21WnVglt6P%zY2;(%>Hp6*&9AGBXzqJaVc}az-OgBTyrnAnoqT zVVG;pMyQ>T{tuVM!-NVbA1ug=?7*ZploA=!+36<(=AX;I!6oS$gsS}Co*ai~DhO2< z`JM948JRx16GWn{`UP=!;)77I`(zIY@V)0J5OD)2syo$;Da#cAJXn}_A8SoG+*GpW zu+Ge9U*!n?DNgKe9BfbUurPsHfYp=YS8jvA9;Cs)`opqD@Y+i%Anr^n_*{NeaokM> zz!eofn5LZpN?rOY%6R`Erc6mfQi-H|WKZ^QkcnBthKkt`s50S*JWgmN-^d&GofzNL zv>frnrND&~tZv5G)_)OMuSU3;B~wfwEI%uG;ve5M2#Ij}kBED4k3=pKgrV-rga-dY z=jVpOYL!4)!Z5@r72W~iWxMM!1ghr*He!%Y=9;sBD)u}uXTS*ByQCX z0=%pe0cP-R6b=LG!yJeSSE>1X|Aa(?3l6RL(QEG@dkY9`2x3 z#u%!esnGtLPx)cM5d%gzv*4zYB161(P)O1&!mA>3$Lsm`i5^2cUAPDm(=sFrz=PW8 zw*p0K!0l=wAHH+a)6s_P_tL}0=!QdqjY;Z9LBW-f!$Jg#T0lN#U$y)Tp+ccYa`C2) z$GjYm^O}WqbAhEsIImp_rET*)JM*s~7Y-%qaoo#83O*)7q9m);%1sul&A*q>+pF+W z#CX;7NdCPrKu!WrXcT9Gm1BMV9*iLX(fJT(3Xy zcALkWrDaBb2hk2Br-b>PwfqyNVgJAzhME=#<%NZ+7TG^$UQ$?P!6y5dllyTWvX0Q6!Us?_6y@s^f3#Gt{Ej$DLBD~2Od<1Lr$e(z9%h@ zB0XTMt{^NQbqYiIhrOrQw^2nBFLmX_~z00juRa(;b5z#tOdFwoQu(IUGSHDNy zc^qph^e_lW5{=rD3aoAj5)TzEGTilF_njnxhGw)HOr_0&S6KDTKAaimtanxB5p`S| zS@Gjz)t$n*AT+kw0B2FQgr*oMPCo~R$0P1^a0fP^HyuetMsNf&lhs6n1lz`dA4Ksd z33_KrxM29dEqxzK9if=ZSRnnE=W~g@^5p4@Z+jwIeUZ5oE-<10x4@M};saT@a zV87PDddtx}zf-*qP=UvMusNp(PJ*<5lJs$$ObCVc<{(UI4Y-TI1Y0FPIM;Xd=%~Ly zig4LnB#Epw=Nz%R=(rlgK`Txln@7t8RD-3$IddA$gZJNng%qXHDzN9$t$%@tmnC@8 zWr6!ZR#>Ko+$a!r& z4<)=V@LeO%T)KVr{A{BC!By5E?VtJ)kt5D&DT+eJBasLMbyJ|}PbhZHk&ws&gOtDb z_&eqpv^h!r9K`jmL&B*`gX>LweJkIK%e{B#`m}4g6)G`4?*8h;Pnpa)|C(ZNUH z1lUrFW59LK68hXcvx5+z43LN*)4%~JO+=@mE^>G1UIg0@uL>&rBi{b%3St*&Y*U&d zns1|7#R8~jf*pX?pP@HjUJE^NJ^TU_awHcd86WQtT0-*vW317bJUVCuEs|{wmoU8I zM;6=$#Inl{oWTbPG)Wg;4yVhbBc8`-ljEzER;r*pWQu5we-k6Pix5aGz-ZU3ePjV( z$i=dpE>(RWoM2<*mCE{IL9v=B!9?B;>mMo~)56+4RA+ho&=Agm=spVZhl?B*$mzK+-urt)2v(UuImWvbVnRi>~GY3h3SNO|L5c3abHE^;h+v%y`@wOzVMs2D_|Ek57tMfK@$bMTM0EVj!h4y`alw64 znB2VbD)JoFH1f0KSfZC7|1AZs0*F%UO>U=mCkHIw&oMfgg6lY>t#&U6?dm-5W^n&} z*2fAk0Ww203fVV}KLF*2un@j3n?R8zeKhd6*zZWryV#AZR}xGPc@IBYg_k)+Lh$hb}1J31ta2 zzKrVyLv_#q9<9N8%3`R4|D^=pjC4=VB8}`O5I4pi%@WL{a_40=k)-9d0 zNYXxz{q-QbATYvLBQg#YqX>jh?`Kmwh6fLNiWDAHf0bwFA3$OBN{mnq)D_FB>;p^? zz`YPX=S9+kf%Rj@o#EkF!nyofzy@sRhqvT!qnHq=CIzuEwY!X~{>Q>c_5oV{BeC4_ z-h!uKH_`_K!_oqL+Xps8r0S!a#r^5TGy6~8CVb5ZCZ6}8INz(P0kViD7 zssq-A_5fZO?d?LDE!?7N7^EcW&;Xu|pBPr<4(tboI`B@@y+v_WJ$w24&A@vmX1WQd zWaLPMpI%9fCx)BR;r;gD;3&(B1|G>3qGshs&Y;wbV<|au@lluLy}#2CpD@KZW!x-(FmS#6X6mU8rV$U9*8E0|v^Z zfUc5*AJfPNK~7Fx%4%l#v#la55@DJ|=~7Ln{u z-75zxrIrA}>xto>w;_zz^~NAzI6#`YtW}5W`8DFj{Oa2SxHQ7BMo9*H_^zj(Vu}TZQeYXlqG!l1|Hz|%S_~dx|jgVflY8X z+*N7CCn!KVh>JR7e8@i?*p<`BhGQYair16)L0(UUuXt~3h{TKMA1#1s3<43Gkjorw zhYP?0;PdzN5&2U|Jw*;x$O$O21zO@PGZ1%IzDz@u5nzwRJ$OE=R~DePwH31lW`AKl zKau_ixj@rzA)CKZ9fwtW@QoHrbf%d+A4c`av_hr_A+D-Uyh?4Q-}-@gk)>9pin`=2>T8QA!J zwrx@ckngbnFrwO{D*(w>WOuMaL2566InR#4dj?=P0lScp!M_4AI7b3UN*QnJwYR9q zsu#*IwoQqTX#A7H7#MHg6Z?ONIM1*JpxYI8$^aoM05EtdFC0XJ;Bz21+{F%7)q7n+ zU;yM^KQjE(KRfoneitYMA!#$M=O8!1Gy1O|LoTC&jkPSK{~t}`BM9g*G-46T8qKNk zC)ghSFGji_`f_lX#Q7X#L*`b;{q?N z&bT4?=wJTFbuJ-62$%f-UljkJ76mEGU_$qQY>%sQ2c$nBOm@Q&!PI?cgxb=Fvoi~c zhObNShyRJ~7noqhe8BZJ`V&ZLWX;e(scZDTGmJ;UZX_8eg>*#`f=~}63JR(5qh7!9 z7l2)tY>@m17(a&on}ETipW{_JY73J3EHImL;M{lpr}M)@wVF&%?a?mmVoY52=hm!wC!Lj*Rrs zy|@}XM)*u1GD3+HT@_g-atzH&_7or?mLTztVysf_b67C5ZOcD%efyPA4R#Jr4(J{1 z2U}ncY)=*Mdn}KNC*?|E+mH?26?Di|SA7nRg3H2d2tT4)P6w7p_2V<7Z21+S)PS7( z5n5$0pnOIM_XN>oh##9b(@ml%-e0{Ye>RQz^qpJ!C56u6Jwo#}d?Rbd(_2}p^StAO(cE4leHLFk3 zdfE&K9z{jZjD%QEUz)k4&C`h_nhVTX&G!j<5Ud0GriCF|PBl=Hu_L-QQf0kg<$X16 zYOb9c7@4vvdDa22(>j(<1iP^>Ke%gw5)6jrrr)VWY{+W2ka}6FICIqr@jkIYN>Rx4 zm8l-_gh@}(U@6rPc0HF;VmxH7A&;Wn7=zcI02h`I=D4Z7L_>GW*L+PQ_(6(S^tBj{LheDH{8`F z&}>*ei@T~hpK*No*HpuA>fRsfzrvzXkOu{T67j31Ca`a{0ktz8ytJ!JTvrqPR9|`q-0$WMD zfOTXmG6e-CK%=Dq`tlmUXG#M!t^jlsxYZMp-aLf4aJMw`9xyP&Kp)5ym;&<@EGjAV z__BR2%MEVjpWzWzH!ujfgPu>%Yx<}O40-WQb~3`JFuIELIJm%$#`#yM@eCvyb)C?x zQ}_lqUgd$eFa}r_R0XPyZ5=1`$B~W+^iNvBf7cvwzB1rPfTZ%G4_zs4glv$6KGaRq zHo;3bvo2c=W~3n<=Z~xH1abE1fwIzcXrX%2?B1VQzjT6xUxPZ;E7F>8*yV*up-fzUii)Rp7aVNFjDo!kA}9>6{r%d_3$l zBq{m~cp^erQJe03k+xFY2Le(-`Eys<7GMo-6FlCM^mmu_uYz`{4)n4$nk@vSGBXjW zoP^~&k(sGlk!=&2s7QX>jF_{3ZJlOW9oILZ`^)H26g8GYLqLDclI*(Q;c5Cf8hR2HM=Bw*6YK}XPRM_fGXsEmEVO@*ky|iTI z#LU%i+tv;wxKNwp;}d79-cPe;5Vt2lE97shA2^i?8KUk3tEC<`wWtb$bV0M1rOhE* zBh>rv*9!~>U{WDTD$r1i#l=nJ9%x`1hU$;9&OKmJ4_fvG!!^lmlunACr3)aU@<3XU zNnnu@WcS+aL+LQ}xLuzt;w@X@-aaTK;vq zE_{-6{oy2UJ-(T7;$h_gDv5pU4z}cZRr>y z-UB5=9x#?sIEl=%e$pLdSgZo+uUpMIW?GV|qI7@Yj99$B7K|58$%87RHpqomZx4id z>Yf`MIkB%ok;$6k0J&%zI=$=&qvS^D%?*G&kHz3Nr-|VzE&tX;93uKBRV7kZ0fcBh zZ;VFfqzH(ALLVu?6&#e@#b&~-*k3o<@y&w=_)q6jvfN#yGgw}#2aT-4mN?OEVy?~h z%#lQ@;TM50jTcTJ66R#a#R64rv5Z`styAB)Q+moCz%t)a*K+%+{pnn-lk5M!OLZ4g zxMNxnhADq%Gc+7qV*EzTdJhO222XNETJt~da)v(n2rIdB@kh6=>tlCW*3b{TpNTDX znQJ=Wjw$s0coaR5{5xoB)} zTHc$IIKu!bu0?NtVE5YBGA4FIkAyg1edtq9t}zDqvpeNT!w8y~&vMpUf87;a$hvAq z3$rtntmFr&@GYsVhe}~bY2A-WG2#7fgm+7%+Aq)t4Z%-SD!f>c9~tM31cd2d0qR;~ z+#xcZ2H{D7^V!lfveZ5(TkSz;?d8dEf_`a55ADf|;&Opzy3qSrDpVtlbL;ws(SCIY zTJ_#?D5;JQ>o2EU%&DO`u33{1v_j2Wlma!Ues!bKkq~w!(rl?4lly*y>5 zC%%=~P7YU@GOoO5c%L44NjzQ7d<=N>O^zY9QE5-ywJmh0qX_C)%D!3C_6GV#~FPv>txdJrx%^fE*QoIe-<9Va`guN- zzX)`hA3fD;^zPucNk_E^+TQ)rPMg2lFC70xjkEh{w%1%r+K*VY#S_LYsgGasy-B`H zTWgT*plXG-p&Fm7q7{>2`=Q;qEb&2ok5|IEIBsYA8|lGuZ0Z3`Bh5`y{E+2d+h=KC zhB(NJ<()slnC+f+M3ll5ON={otc*))yWQ<5wDO4~JxJpilwP*FDdsTjO2~UXBR^*ka`)#w;x+jP^`(9$HvpN(b z$nY6@Q32&+>SZMWlonKveKj(BONSD|7rzl}*G?MGR=bW{cv8C6FUQ4eO~OSkw~ZJFLcLwDYsxPS_|;29T+ zLIw26Hde!yie5xzNamKp8(-{r3-fO!JWbX~a#2-)C5+T7AC!z<%rXAC{W>lm@>4`R zKtpXv#tV!*C!TBHBe+3h4--D&R$tyQ)rLgK&}i^DrCz(J@Fw6s&I*9AX$&bM%Yo9m zFyyhsQ$j*4Iern)zjLlPiT+Yw|Y-S-Ze^UO_eTq_+e1?vZnKr|}qw3!-Z8r75-1oL&|~^Ny9DzA^Qabk&ifKN+njNG82Fd__YGO2R>6 zD>CJCc2@DkHdeBF?jLwv-QuN*b$KG8FY!1!gIR;n89I6k<2Jm4D3E@WG{uG!<6JYr za7j!?)Yp8~_AlcIgPAOkq`s5Kw^7ViOUuL*NEey3Q%~-gQ1H^XY!)=i*otZt)VWnRulLluF1SG>!g zSc&r-|(Yw!g`*DQL@r4zp?7b3~4Mwu=v*}%{+ z(f8RiPdWT#8#H4oh@yDQ)8%?5vX_xs)zFHjV6XWrN(Y%6q+oyK{4VRzG zNDnMs_#{LLg|SnoeMc+Srpx@)C+%^o=eXyji+w2=9YQo;8bE`D(cy!@*vWp$V()o^ zjRZfLw(^X12l43Q%|Iu>tz)ev>J8`(d5eD5LpAwKo^%K!B zx<}_&T9GT09E(Y8E2G1r@HdR*`qHfJb%iBYbx7f znJ~4O+;n}nLAkwDyR7h=JmsY82jXk1YrV-qO-&Y6(VIU=WY1M(HR;9^BDQqPPHdBX z_oMZJ;c%;;^1MC0+?Dw@^{osk5Nvr2qe%)H{F~G3LaHtE&q1uWaMB0MxP|t0?=aDG1&=M;VVwNMn7t3`Gh$xOZ?y!fS0275t)O{Cp*xF3!6T6A@8YC zn0!=`G;6{NeLPddc!+qx>D3Z82`6AoZk?DF23YF6Nq3zORZqubRmh{`^-7lOoP877 z1Sdm2`cuRG$IWu|We`jS8H5)rr)brj!H}#xh2cFhlHQRy;0j9?`~S?fN^R%QL5V*X z;TIovDyEk3w{$3eJEE+lP?zla!D$A&%&N6blWH+`Cf8{J{35Q{Xqb8TDn4wn@56at zJzM0VT#Hji$30>d=ktl;dwg8YbpFNSmdnOmI~n5CY`ZLWelvF(B)^UuN9^Khyrk;U zz5TNdWi{QrJ_c3e`OkW1pJ zpLF=LTEe?MUH%*jP;zqV1EeF+30q;9;QOq9`f@l>wr;xpkP2=uNs0TpXtt&55HOX| zpeYeS1q|gVZ9o1T0*ZG%Wvz#d47~Pt9yYo!Nmj$ss$$VAjdw=gRHIa^34HW2cD}s` z9Ne9A_*~cr;sEjwIdRfcBP!=QKYg+}w?5JG?Hn_C=dln0S_@?<8I^g8^}nv#Qg@7O z0Mc+aGP>^cYMAX88ijMSS+ND0Fqf&Dt1s1M->xwM6qZw{0i4nXoJNa@<`3w)BHJ4R zo-W-&616RLadU8j{D!Kz&*SAJ%r=gC6BiE?mf%*?Q9;ASD${wlYHiO5jXUwboNEMpViwoio4lRXcFg0_R?7z= zn=8q(SSddIrqH<(pp=*)A!nX&A_#rfqZ?19{TG6jm|oq=8eXaiJE_U8Ll^vmxz|8#r;>4JTB)@B^FCrn z%SKCP?3Wy$>>*!KTuL7<-MCpSk6-fGhqgx#R}HU6mGs#l~3 z?XywFm}BpjeojS8KU?XFf3zsb8v@W!^tLMfr!&qc7#Oyz1osf8lQ&Mm7C?k7H`A%E zLpjRtUz+UlEw%Jh+z)0}yI1qKoT251t&CZCkHh+Q{K4vNgj5z~zQ1o~=B%1rCA0?3 z@aZSdJwubKY#5*AgB)}6Jmo%|!f>tBtL_Wh2Zfh3x0buR4^G#eAtg}_67WVm%c<|E z2t4J^3M|It&O%jwO4AKt=8}2#rB5Lc`7x}YSS5Ezl{u8Z07u1}bhOuPdsfQ9JlP>u z!pD%D?H*6wGUMtBDtztdT^1I((DM9Wne&n;&0fIMZ=6ua)#!&7EAxydeRRJdoU~j& z&mfQiI(2#-17^LhpO-3dY$B>1-F$VFO75U%nD}pU5FGTl9FF67w%4_eU0FLZUj~F( zHQW#dA;m)S8$mDj7&qdHe#QvdcUMRk7jp2JGjKiw$-fznrxJ`_b#>Fu9ljLYR_Li9 zn-9do&mE_RMXneEYK?~~gJo2*E0C77uBUizEo*gH+MzgS7Azd_P<>-Htl~5p6tbBX z{ZcX1hPuM@B=P6vWTv4r%l_!rs=f25zo;Zx2kh#%r4`Y8EA&r_l}_NQb{t#J-b~ zG%j8i1OqGmf#kGlAcVMw=DGhZxXG@Bf}yF8=3Qr&3PM*g13=y+L+OXQRvd;*9o#Hd zH^`kpwClcD2Zj1Q`Z7NWIooM>MVf6mjnKAkeI}=Rl$R; zvP1-?e+Y4Tr%0DGNu^+KaxM+zaRyfgG<08(?A$pQRshGSEq9o;TyAUqvY54nAE{M$ z06lF@F<9(6Ep)ACu!h}m;E%V(0xezL7S=s(i}=PeI|pyKDjX7=9B-vNO>DsihSOKx zqSV|A4%ln59v)7dQ7~=)fuDjPDNJdfo-nwa`%E5qrGBIi9?irem{Q9OP>me20@?y{ zs@op0kMGuLKZbr>m>A_z8-z+3vgor{{AN?qAux6Sl@pbU4b3+_+#JJ<_A6U*>355Z zHH(^Y2>aDjyY9kW;u9gR*1I&RJ*-wY47*|cZEW8V6DyFqvZpBP4AA?^B--{C>(sDe z0dL!~`D$tKa^A}MjQT;R#V(K0l1EaE=|X@(d&LDp7;u=HkS zx4GzUk+Q&77#ym~@Wz>ziRnGjQ`>?{X@z+sH z@qrZ>4pIkK4Hu_(Nw<#2lFLIrjz|=yoNR6SR=!fXTJl0(%R(C{JhWIrXEqa9k+ zDf>yTCL8y{lO(=5hKBIk0^kgy4H+As%0YVQn&tDktIgY2HJavO_FVOrl>^IUqbv`| z>B!Dcz&LSNB8_)#kak)>5a5ClW-73dM-@(w$y=wpblF4&*U7Q%-in%!4@!|Uka#j`BMrL`GBQi^DGsV&>*z0!t2kttKUGL z^b6_{oBA*1mupu`&%~=RDb*-D6l}(uk*z8xLfdBQE^8kCK{yr~0`4!t`z_2>IGo;w z17eR(l&BPdTP%BNH`lVNWVp~&4hC+-xsL06=$cW9dL%@2>ft^ae9<7+)9aM)|ZoyQ}jZ4iL2M#?t-`744nBh886|)+1-+I%zVPK zB-=D2;JP(h!c>Qgr?|Du9wR21l2rk`eIuA2T}hWEe|f<1ah~HrGznZ}H5dg}Wajl< zt0w0`XTT6zneqFHI^F|uJ++9Ku(Lb3Ot52O-P~AQx;m#F)zeOBzI?I3cETpodKj>q zTW3CMc*T~)iMJaYv^&+ONeLUuElj)0mr)W2Qe_O0Tj7O|c4u!pBOE%Kw-1i5hT-2Bk9gus z!u|?1*ViPM%)>x(7{Tc0}l0ZPBH_N?Xpdh6Mp$BW(D!_wQn2oZ248)2TS~;Y zWY%x)DNRD#Rr4WDrb-%nz~%dhSOAzU){cFKus%K+>4i9VP$EMtig`U-=ZOicslEhc z)M4WhUgSV;B(6Z`)w*}aFa%X#`~6W{q_}b=UPr8c&`f7XLXME6a;VqAU;WPx9Wv6kp=6b~pFjeWFg;CC)ZI zG$_$O(B3Z--PSw#vwUy5JOL~dvLaF9b$e@trc-FSOPW=`#g>{6Utr@l^%&?}c_&R)|bKPo-o=fQcFu<)v1X)8{ zqf1It|CZUUy$?ywJ#e>eK$68zXLa=$*jJ&JGjrk3%|Jjbk=Eci_7^uI4CGOc6UWVaO9V9?$~qc9UZUp zFYH(mYQAO1nWe#A-Teq}6m&8ega%B~B+RuUk2wQ>bAtc&h6h}RE)wN&EPgytS2 z6$)W~q|V7}6hGC>Fu4!_QiQk94Rk)vy7=Srit{$TRlx~%E`TO#^W#Bowa6T zL85KK%{24Z9%dikydIhYyUzlld}xDCVNIuPcq57~fnHcVh9WTQsIv8y?6a)$$2d#> zu1By4S^Bv5{9ji%=Pqj&DTGtFEd@k>ED}LRSBTL~FsXg@g+gH+-{5BBN<5oua#olO z{EJCTYiEt0Yu$vKwa`7=$*aFzM;z?!|IvNj!JYnn|AQoWjNu$TDO3c+QWY2HZ^7`6 zLAq!;wAv-6Z){-(EE4bSY4FWNw}nM!x~W%{M>mR07p!2$^Ez3g${*bQvi^&^yg0QO zci5FzfcnRHbk4QfG*}6S!WIRxgok*Km?3<01f~@wK>tNXwBJiiLQg*b zVLBn*)dVxg8VN1=rxTfOtEn$ADHgAU+(+kq+>`W)h2I$YW~zJ|CYj8Q-n9r~lK*&) zksxmuBq}8RnSKJ>KT}a#?E^Hqw;eHhwCS1 zq=&8eG5!72kZh0s6YagFkmZ#VO53Dkw1xJg+0K&^+F>Bj896>-8LYDPBzjvj-b_VR z_r=Qo-r)WX77A`0z0q0c!R>(+5Dp3nwk%kLKWIgr;Ir`!<~O>y{dp-UlUYioC2dbm zjcihd7j^wt&}?4R>uumSI8DMRRz#80=fNg`w(_7sNGkpQj`q)Y+K@3jcc`QBtSSwH z4e73z!-oX6D}I`Eop;vWsy7x;P^FPa2TEaeSya7$&wgEo#d+)M-lTW9yh5Z^lG=8n z4GJEtbi9u~8v|iL(2tRs{$Gwh>wYdo>6l54Kpzfr z;kdK3CM4*v>L1(5GOj5%qph&rXfy>-lE?)PnLJBlqNE~BBR-{v~GeSlDX^ePD0{4 zyLfN=1)Ga5hHmmePkQo}p5o({xYI9!* z)mv;n#3+s<_fxnc;m{N+Y^f7@t^LNiTRu4S{c$|u>{by{N*d7x&-ijI7lg9u%- z{2r0atp~MlW3O3V-mNR&k!bnVy1IBySMqc*q;Jw-Z-L5u(zq`pgD_H zlEMAGbw{(<4GNXDOH8fD?Q%{}K~&$4`1NFI)Lq)jP-~?fg5$Jjxv-M<{O%S{y4iMB zh_|Dt9;;tbF|{d%R%#^7JR>wgCOT^0kNCMJ#24)`?x_*?{G9dr#nsAverlLpsmg@* zb&^Y?^{HF)F17U=+reMmShPdDqd2dc45EGgM~ zL!=bDtDk+0ui}H8*jvI?6%orX=gw7QV!AtP^=4Xs=HjfIACwRw2yU3Z{25|CH%xK1 zRg2u=-kTlVOZVrm9ITdq#F3LoOy1)YEyBgiQ%;59FZ_8Sa}c_Pv3aFEf|WkZGR}0{ ziMcFoCNiH`_cx>Wi~VuX5`O0?;;hx2;zYQT<;F%HY>$-Pl^|-**<*Oy|IrlDt?`+; zE>F}9(?JHvc+z4%N^d-ziW6eOGZqKS9gDMl;~5w` z6h-grch#M{qBQjxJR^H~WzOGBleaE3L}+vceniweB`DgYPHCD{j9cYi4_&7vH{Y}K zAcrW25wXkT#(F->+=Knn2Z)4esRc%pjQHm+-K$knR*O#sicuJE2;Q8%S)|WaW-g=D zbnW%E(3NSISkav)yRT(jVokRzGy8xN%Uf`n&nQ^l&?Sgq=kmUXPtfejw)@_=JFD#c zcap{4$-F(SnL>c>xN_);uXE*&4C0RM=Jub(Hz>T(X_7}#9re`JcyosW1_0$p4B4F3 z=p^)m_Ak$0BB~?VrQ-gawG{)pJ$nZ;lwo-hV`RKnnhUSU;*-4x5pt&u>ii?i!kCfD zoCN*!G8a|+Y{<23&&2eE@A|1Q`dNzUO^p2N&wBN`r#a_y`&?d(m~pekzG{dMjf}sW z?(N{}Z<`SKu9PiD&Rc9h>Ji*vT-p}yeOsfRBgek!Xd0C0tmmwEyS!+wR4vE<{)^J8 zt1tY$K89bL@)KpBR?j(KPpLveWWVP0dhG}HM}?sEn@f+XzI*vg$5=RmdRHru;w0yHghkF68Ci-_2d+2u@}|W`oqA9k z0Z1&v59cR$@u>xk(a*)J>Ym-*j3B)gYExK8vztyiJssmNz0Bh{BgwTt;N4W?Uor&^ z086(DF;xmvF+Df08qF;z@%@GK_|k^mUuiM-9LwB!qGBKS(PlMngQxsM&KnP(43<|t9J;pFL7=)m-M-s)}G zL@7E-K6@8`I+@dbW zdcFr|XNooh;@c(+)_vDr>qmmU&7`s5wi?n^(CGB{8I?I7IAzaRJISML9kFi}!gQ|y zssH<_64rJ#dh(nr?m6yT6l&KN(#v)Yj;#5HE#1W+3Yl&SCH=16wdZOFdlzS^N1IFq z==}r(miPNon`#z$n5=pw$K=-sW0hSm{oQ0L>{{KL5dU7Aes7?2@zNdN+)8rukMFw$ ze&Ly}v$rs~HL~$?)8z|n58cSG{+_+*Mla-1arKm>u$jNbg%42z7lL=sr)#>8#?2Vi zT)$Db{MCp$hKV{g3DuJXVkvqy=mY2BplM%Q8h?qx#%mOd&9Cm}2!pSM>zd!AD2WY3 z?I>(J7h`WsWWqJ9K6h2-v7F#=%MT0M^a3h2<|$FW;*RkAMCSa5qXqLEUF+T116!Si zq?~0Vt33v8KGnDG*uPO89Fc286qrUv>`L^xOm*qPUGlTPoTN~+GEe>dW>5fH>>?i} zYMymbaxQ{Cz+4H+pW?H{>&smun7*2(#XIh)*INB-*AsI4;N=!1RLJ>QMZfFbC9LuwTmg=zbL4U`yT-wdIFTh`6 z1LWznbri9|A$S8Z?c7)yRy1}q_v zGUflZ_myE)rfs{*prVW-f`}q9ARrA6p@4uvm(mRiD2*T`>B2E+krX79MnV_es7Q!N zvs|PING?!Pa_#H(eH}q(AN$unzHfi~onIWt^UVF+aoty(=Xqt@B0QAD3$sT{nK+x_ zb3sgFAV<(vFOwU^Fatc?diD=CX&V-@!-plId_bq`{-A9|tdI80BAIW*j_#xoU8{ij z-VYXOeF7zVC2E zJ_T5Ww4}8)Q?HC!uaSU|dnPKD6KfN1*WYCj`8-tdILBN=ax`Q)=rWUs zUe|J19{JRVsm8N6w{0g0<9-If>NFd*Cu`~WyAP>D1ogTP-8=d^bWWJuAO$K#e}D!7 zUvt2t9ph>=Z{@zc*Y%W+ACXfP0Ny9HN3lAHijng-HsLXwB@^$*uto47y3VtqK(hRdhYSZB>;-i_8L(Vw!)i#@Vz zEm~Q(a2@nZQi>~wRt}flVP+(}kFrircz4Ao-~4{RB)h*k@$=|-#AUCy%Fp07SK@BN zRn?W#B&_z1&I(@%JEWqw2V+C>EIerT?R5iLg=}uO2iVxMWsh37XWZMuB-t1_?qVsM zIl50SE^$`MZuGjT`Ypvj&xNGWpTtvi`&$-GKM#fU5t`90Of8Y->>=S%B1trwa4~k3 z^imOn)RNk8seR#jh2pQ}M$UfJ4?;!x=+fKT1^?3Q`7>*T*|r~%k6mX_wkV!>LuWO1 zVUij%^@l`RDagzc$;r%?h^N}lq6n0UUz3Ccug*GCjPB^u7}4MGNogWNrx{*L6*e%W zebT$G@$JvRVMYPfex~m~q#p+=Af(b}&Qt!I2K50Xjt+R`|e} z2GBdzPp$sLTWR?*K%M5X1?9eRU>Ia|%tF#MPpt$pntx<#P304;O(~Hq?FqD)?(Mc`&&?(3cwS(Yl=*HTp zq-)x-|6gVeo|EG_p>$Q< zc$1a;v0eE8B9Sqp7?kg1h1|Lm&-<>WG2Elkkt=B&<0OrMhe*BQhBlhfL{BPh0BzZ0_#Qoy|`i+U{{^@P(@>Jgp%;rC- zpuW#i7F9aICY87k=24Wo^UTlF_^(l&rrKXWeK_0ymp6PH)qft|w4eSnVgFM$$*n*5 z0qOq2B3mb*^P|n@2krjPhcGfp7+?6c7Z2Q1xHQ?JUv|T{QzFIGBeeNs zZ?f6{*Pb-FMEbMc*iQ0_<8yn$h9EsaaCpR)AlJOMe1dVMR(=Vxr>?Qf zNQS?6-UEGx(o#!*nHDd$XHZ4L^*=G{(Dh(}msei^V@J_B-f{S6V_UsO;=C&kR1n5B zT;t}7-rK|fTjv7!$7iUle|hgY7Ji7w^P$~BXIBDL&x;C6HO|9bmI-U9nG(+O^D6wF zE)~a;KAB}LbxH_C_3iIc?X?y;Z$PAOH&@Wm+Tn+p+u>>P%7s)MjaikIm=;p{KQ z`}dM+;9IsgLhUohqXb-jC&RmYV5BX6VC~th7Eujn%T)dzEk&P%olG~aelHjWWqnGp zxM5U}5C6P=7^6K-bblEEi@ObY@$HRlnX&Mq+wf_(1%-dP$nMA@?Ls+79XQv~KhR1x z6%{+cbm10^Veh@nUkB|gC6a_K!@KUc$q(Iz*`R?j%*-V@|1ySdQqjFKsHwnsM6Rc~ zE#U$sd`tJi{q=U*N;iI4yPJEmfLR9seHW{;g*N|uI4|omi@Dzc0UFQe`JGZ z?CtCzrbWn-YZmuG?+-GiRB%$vti))xJ^=3uA$_BEH{5nIiPvJ!b`ln5_)KPYJ&|85 zIMoh+wYx|wtj%AkW>#!Fi8}b?r&HGUsr)>OXi3KH zySG{cyi?VbO4DB!FjdC{QXF#yNPBN?efkj0!aUmbl8<)S{<7_r9#;cyb8f8_gzpAM zeHMR%0k25IMlyc)m&-3ZDOJvdyP_V6s6$pHmc#1yD;R5{j>+Ih<-%Y7m@Lq@dMBOD z{X&5K3W8C(IF=&;x5-Ud@Za3p1RwsG4$R=8VqhV*ffT{XHTuqTFa#MBsOc)93fxMA zs&|`+ZQFhl(mTc`(2z~OkvmVRF&GMC$JDkgK^WgPKvE||mz1F}9~0T{HcaVBH7Ef^ zxd2TnQZ}5Wj5YMlem7n7Rn;hgsUo>PUwDM` zejiQL%gu%&+7okb->G9rQh4)7=A}M#sH@tF-RCKgqfq&a^OwE$?H!2{u*+AtJHc%K zylK&IPJ@Y_-k$lNhyR|Q|4i5~w)y|hO`=o0%Q^GGmQ%CS-~+&D&Xq!8y$!UH!vGpz zS?G}mpyOo7nx+AYmLIqn*@SfhxHHxmpwo(a0VhfoBs}e6MC^^)=|NR2L-6XmNBN)= zE!1bniZLP+JQCytnpmkInIi@uS>=?w_o^sS3lUKe=xK+vkiK45Z7W>z41~GdrOk%H zf)lxYqucML(sAX8Lq~?T6OG(qAY7yGK=GPvzlE#`;4)P~>Rq$Z5HeA@Ko8dAAOJXz zr;!r^RSb}WBq|UR^l2u1h!|KyU0XuZXA6`7(;L;d-)vD@)IOlz46>JtxOE;Q+^YKH zaAs1nd@cl+(b#i2q;s<1B-`Ms85!!wXv_EsT3J|lFp336!j)+eXJ7H}^w_t}C8dag z@)XN!(?J@r&e|VVWco&ojBK@8NYeG3h49TwA3+LTB_lUhtB7K#Rp#>0 zAjRqV3+|vM)CTLOAMH2{O@y9m6rCj{>tI3h(aDHRDm^{vmW^b9#*3=fZC8fbC3Ty( zcbrWm#{lrlM=lp6HrmkzGe2oe6g>?(nd3-QbVH9jBl_+(xc_2-s%+aTz z6U5q6{G#8a1dtlj5Yv=Odo^wwhG9?5q!Tn#uCMyY56RF+QZ5t~ntkz7;(b)cR8jDS zf?fzQSlm5Bst2(|+XK0Ox)WfFMJvFqk zFY0h$S97)+lbQerv8&i>!4_rPj6n0sz zFW_?Xjr(q|WB?7q$43;r!8Rm|>kTGLgkVK_4ta6VFH20QkAVv~x_Brx9ZCTWv{R>{ zI}gW1Q0yM0M9gp=>BP-S8IkH5eeu_(?lL{MO5(8b&EP=)$FvdaOW6fo3cHS8lX7qN z=nsp8*mIGhYlOkU%X777;GJ<9vQUT+b)lq|%&Zw&kr4nbyF6Jm8!SW}IEe(<-Wjta zpzrK9Pg1oWU3BUNU8j4U0Au~>1;J%8JI|)~b1_5@b2ux~Gr(AXv6d>^yB0vlKzS`B z*DbDg()sain#{S+hMk?wIMs5W+5!h)OMXht7A=JyZmIOHy#fT_T4wuyIy1@V{Uu-= z!cE7<(xG+tDWQH8b)EYS?1N(!q6u7s7%d zo8)m%E&Nz@tq4T^wa`wf6(n+5IXkIF+2|g3p9HB%I^EBnl`CtrhcGtb-j7GB)*7qC zo96*Io%I~(GKV60Jm}3-IRz_MI&&qAwR3%n0q;kLbYob^BV9WB{k-sS$GB6b z&1GZReZ9pZe~($+-oM=J8)V+N5QrEG&&0GBA8hjNS?p+q#Fm`1dOYWv=s-F_R+Tq4 z*K>p`C?!%a^Qx`S2$wPbD_j+wVTB=@M{sRGE$hDb!Lg{Hl9V{Y=W|NS+4Ka2){g6I z02Chk%GLy`YQ*yK_Z0ZC=~}QP{R^i8Ok8>c$4A=anaZjTc$(+_PQKOfrl0%pXTq|Q zdy}HQ&t_Sgot@xB>vSAWmJNJq@)W|CLqpjqbT?^)8vizl0zZw>Lz@3D%RX0-JA*emlU z%!mgs*B2i2PUY$&2bt`<^)eo5Hpd(rHF)}t3YKBQ5W7aa%0*;>nh?7q?mBTymOXdD zZ$V~HY*fXR`Er4XgFc@(WAY?>*&md*0Z~U_J@j4h$BYgwPEI;|liG4~k z+r|*IJqF0J?f{DJv1(60C;|N?W*>cWUR-5iT$YoNH;Z%jUd&5qy533zrb0$ucdGL` z0JfD;o+5kuGWT9tTTDZ0(8fT-ml*fC*a3sp=iZ-hF(&gAzGv=q$g}iZWl6gcX~kNx zR53$eRl1n4i?HIiHtNhBDv4K($2!mb9X6mJ)OkMx z;9G#=vhlSSJ0g0pAN{#LQH3E*K#s=Am|b@mfD-NmHS&=&rL+&V%ufN?P)+G02;8Qn zM6YFqnP2EmI9(c#!axn7_p~B3?j<$Dag?3`OudRmvk#0I6#9lpFPkk5toIG1a960* z;ns>L)FR(7C$wB!D}9nMBt$bR%9f%JjFPz-L}ZaD=>zFJz{JSV98x|nTG>wlpS)~M zB-s0`e(j`RKT5@`FB)e(E2f=ut#u0( zFm4ncbz=Rk?d(S-7BW;dEfr+VcYay%vtClRcUK1tTV?0P1k*IQAh`g3<6V$Y#9bj* zXFJguOoFd?pvkvoyDODS1^VY4sELQPsDGknphe(C8)?iaSqG9E=V-hUy9>|UVo>M_ z%cKN)_wJ(LK{fgxw4)1Ol>I<}TUSZoGM_J=vb=HDWc1A1%XgZ+_mzXAqEP>KuIVpP zm+{HzmT-Bg66Yj|Vb})Qo^IE0@+$CvoBE7^LXpPpdc)u3TJrI)&=w;;jpp8mE4mZ6 z$Y`(LXjYHhRl-$kbGBXlxKYXjn+GjU&_r^Wz{ADJl9OZ**k-%Xy$(*`YJQsn*9c2~ z)KER=x6ZUw6M48k1hJkUg1DKw@Ak-guszmfOSdP5a2KsX$0q@PY0xY6+$QpA=a#A z&pQ?$BW8(=(mv5vlaDj4cL3i~MOdkux!>1o>Hf%O*qNnKwZGnOEs(E&c9dJyfyF~F z*)q1#j(Ygt4(?%0)pBYn^={FDCS~FtpFj!c3J#@bnTQ9mP(okG@f1 z^9UUCUiC4Yv?&hid91#L5`2SP~EzRhPWkMggeX1HEc7Buj*+ed9Q%*UTI z9}iF2nXYDNbfu+-G0sBdKK+>nbQNMR3R_-fp(E%`NYIoo7l9Rgv#?M7MJ8s-hV&|n z;zRY4N&NBFHWL@pc5G;Q#AK`vzhXekZ26TWHTLPQ#Tk4$F0$@MqkeyzO9hqdSMVG{ zjh}+Fjor~3!=XXk3(DFe+E_t}6vFgyEnX)!ueUR3uSoB+{0%olXbz?oWaInxc^{`& zvy8k*7j){2bPlPo6WeP>(X;R;Wzq5br$2Q~t-}3F_a$D}yn)TP$)2`Wxw^O&NvUe# zvnS#1Go;GgVhGnE&D+<1<2~@v6>qWPlRAP>F@vC~rzYBXH6}#GhDM53KT26xSMAkW zEV$vVvGB3QyZ4LyCTf`3D_7rUN*zo~m@mHd%s4`75gH{(4Ad`WuKUMInc`gXX7I8G6~+6FqOsx*nU_95j@Ln(ym zQbfI-^(E^np=EQC`tT86J&+`CogY{ADYclZ&uNaQt~cg_K4ca>JAux5O+w&OYcRl3 zOj}di78t))?}~5U(VO)Fz+n$np!?dD*lwdkIO9ka)(W8h0Zov6BZa$Izt}5N*mIbL zkwe^RSa8Oy!8dL^iE-UxJjbEG3cSEnm7Is^Clzr3J~XfdJHvHaC5_HdlE_{JO)^sI zEq&VL*;BsWppE+Ix_)UYTVe~|s=8%&4QYV6qS7`uXzmkraNI&)>0Lz71MXfCwy4!^ zT%}gAm(8ohBA+Zq%}Qdsk1zclOYuDadWXcm?V}bcqpYT0V0-^o?R8|DRHsS z93YgI!`ED<>T=nbguLxVca}eV-|GhVzoXzc^Qc|uCZxYdu-pqNVI~u<;KYQdtRBz$ z)7KG%#`${t$L}bM^yND_-I!)Jg3_YN@kWuAntqS$!pg&Y`H+Q5 z1%M)-E!1PEME3U0K<&d-!TH|ibU_l!ePxMAtlt50u{FejMQR7Pd&>Iuv(Pm9-lB{y zg#;2g#h+sYzA2rsy`e9ed`+Esv+dL#K^@YW!7g!959(vHf)ssDPizF2fH&9a(uusq zK?wo>A{$b-!Qg`;W+hlf#VaLIibX|vU(j6!j>MsMvHmZ?-=bz||ERBT9M7f&33nei zr{YLOpePQKsY~UkGZ^i5A~?+_eSG}VB(b}{s*!TX^2EKD{alt6657Vvtu=~m67EU1 zdD+#Sc{E}&?ROi{Y*lr5%X7;3T-=|Ls>@h@o7B3Ay zy6Zxiubw(=6VBrvCDR_Fjn(m-o(>EnCQ2EMp{VlH5 z*O-~Yg>&AcLq$@T=f#cOhxy8u9)$%Ux7de|Lza=f+?uX>AID(M(RAd%Hnyo zDJCKTCkbxJ?Ydm-rRdxQ&h?NJkZ5wUaz2-8Wq3>N9SrG|dbj+R`+=M+e1|XUla`~- z!F{cojY~_oLEUZ#@BLZvt1d0OvlsIJ0m-ztfQB+0$%FPAmNn zusaNUR+T>OBPcnYq^a2e4MU?6zfhu>-OHLKV2rU68K|UWHl}FzG8lu`?NJJZiaAew z1XqsEvnl6`i{4t#SK2OT8|9^l5oc;Im3n8+YlHGQHBpH5r+Uze0TY8dpB~c3+&TqV zZWH5z;3tmR6Ltj^=|+M3mr+Dr<3u4rt)T9!k@?w&JYt702xES6$+Rl7X|={$#AB}PqD`Gl(hK3^ts>eEV=|nU+fynbAQWIL%MIl?@n^6cI1p@2T~2g(mNisEkin z+l7d?>@8L#??A{(vQyy9i03YzujXJkn&-x4-wmGp1X=t3F4D&dD;;TjtTq}aXdURy zAxDy%57%KtWL&X8_dHCr1o0dC+lRR1qsraZIs?1Pc|@JhN~hJjFm7$~mD4))Dc!0> zNHqJhb!~}nt8b$1?E@s#6xubD; zL8H}@v{yjhw$%}`hSav1=If{(f%EBX@m(V;<3}#na00gNUfi|5&FOc5xQ6dAy&Uc| z97R}g{ARR(mGYV2I=evY$IyV<$d;8cgVS;*H^W^uysB)D^FBHXGO1Z}*G@m31mRHL zEHR^1lzm?1y+^;7mz>nteLRK3u4Un}U#vF@XmFYojSKBUHTN{3$bFgJz;$y)fnSLP z2&tBqt_WP;MsT(fhr3bnDRgLc-ctlhx8!2Nm(G#Kf}s8QZtF7Q0&qzif4eO$!~-oy zmXEnh_mt@}QmuIrtc?|s>-H^o!fe98am)zZl24PD^~0YF9@4SONKv?zaqB?+2C`t~ zxA5IQD``Fo&Z&hNb3)Wq$}~KoQDSJwr{J!z&T!GmNzmh$Nk0IocTs%lmeRu|s%U7D z$QjH3h1~e-*_HJfpMg9(Nf1XF2Vq8zEJ>0Kgbo2UkbkYM8_3GF?Ph<(f5~>Ks+QCv z^lYjH{%^gKlA(oAg^!xK?wS&o#)0)n2TxsmJL-C#Oxtv1^77fLCXoU0fvzu@JDzpl zv{>#I^*Sp0zkg8Rzumfmcy<>N=DSdt`B9v_$>Mdg?&7aEN#-A=*zC$!zMLwpJ z2RohVgF3C{nw|_-T5xk$p4upo;B%mF_@G|6 zCA8&ZAV*b3eIuNQd=CT=S6@UB%h+V{(8<5uvwG>AIOR2vZxpdrP)aR^G^}u6SHm0H zL1U*Zm5BgrHxQja-YcZ`Bz$CpxA(c(E0q3rf~Y}%By<`2-Wx|cfXM42J; z&9C4iWe6Te{m_zeaC`PKSXHi%!o3OE-74z3+|u)S$dyY%U;l zB%mfY1&Yh#(Ce8)d|J_Xr41?T(AY8d@Q)D~CuJjM1Wue<(f`zy8Bg|C6~WeC5qg_= zTecq0o5*|`8ENS{goH>?9n-~CQQ(CkU&8MeWoNvwui%^;xaYCkk<`4KOlpq#{DVJ01r=&)-UVWlK*hC)_zTq~^C za>>^`p%j@0Q^a0C_aQ;26_EO21J5QuIZHl&_tmQh!_LzejdA}d zVOn+fM)Jg0RKF?A9A6@nLuo&o((KrXZ_*+Cr%6&HckH;_;jGIrSAT|HgmvRM)T+cjY7EkxN4FyG5g_u~iIzpoKw6Xoghffv+ zo)7Jk_>5U|4{7bI(D^8%PaDpI+@rY$CH;zlgPjhDqq{sjsl;(&L+6(Fdf-ezf^6|&1Trn^KDW3XUN#;#~Hx-mB53w8-GpvN<MA4-!F3Wd@WBm~TOO#^9>!g2{T7W_w0lk( z&&A;0F0Fnw{@4)Yb|g8D>001d$%*u&HmKLcBa%`|{lWgm6wUbMlP)I_OW{U!)x>a# zfIpIQdOHyH>alhs9ODYd!(Hx&7`lST4{iQ_GACY$QCQL(E^grbgox9wRk6jK#C6EM zXHush?YRNieGWGH0LlUa?*N>PcI0q~lAIuUB5>rSuVSd!D#FvHygIKCh~%9(WY`P! znB{1~VhYNBcVF!x*e7#+U4pxP{$aBxo9gKPFr%E3Pch5G(R^+%$khVn)*;^?fp*Gp zlG#(EfY3v$P@lS1)|8nu89}!^Tua;2ic0t@sR*8|#QR z4CpL>86ZMippo&@mo=O&2G6-s++%0>xce3q{&L#d$xQ1`LRxbU*9Fq7rwXCn+LvQB z&gwMTeOHPf#_eCTu72ym}bC7)E`jEW@I!9UhuT%4_y8CSfUnv*r2Mis#^m#;$Lc8HUf{5T^eh8t%x zmKWJBX%gG`@*qh~NrW)6hkNRDV zKxha(@Ga(EN#A55F@cK{a*1z$%hc?qTy|s`qhD~r{_eQi43G@!GZh&ImpkkN-v>(> z@kK(@BM9JBl*GLQj|Q2T$0?Fp40$o<#Jb2#-%%y?HSKWFW%z=C7q7<_T_9D2m8+Tq zgxHDOx)H4Lj`!#;QeMl&)_8n`M8uy1AQRd?mw1h|hK28j(ky5-5Q}+YPr8sUe{?=d zuq-(Lc>yUYm?r_RO~8H*XmJmjZ58tK#y&|sH9ce-vUs-%MJXZEyqqG15{}xk#-?fw zE>Y2fju|PDZw9Wvdh(7{22;1U&h2GN=ZSl`X2>3d6Duk}kA`hwhG4wjX*_~^{m0#- zZTRy6th7-1wJ^T88x+;Jmf2bMkC9B=&KAfsRk! zUOZ`h5O=PpcAbqggPv`(Z*`EW=SQlh813Teth{$I;ljC)^08%PH;U4X1VBjA^Ot+L z@(q2sn*Dn63ioiv~QRa6aD`kvxj?mn{KIW9c-~B8*)AYp4%w_)o z&fw5qX~jUAV2xva+!uY4?`M?$H3Q0=2BHe(R)%rm0ZV}>cf5Rj-eoa_ow6U?CRLoIrT-(4wTH-+&;g&}4AMu`+MHB*h;xfG2x;w{L{JfE} zp=N1k*oiM47t7fxx>m}1!Hr}EM6>6wzJ64R&4t-zX*Fs^xSX{w;?6e zC>IbnP*(YdXnzYCCM#ZbP2pd2#GyI>DdQl@fh3b zhSHN=pmG|Is&OM67e!CiLpd~I1Jb6YVTOuJe&QXW6CC}G%yDmYR+8G{YHe+6<_9OF+=8V(P8M4r2x~E1KI5Xz+CB8w@7~T?`BzEsYJ$Z{iI3U=a8+kTOu#W^o2PjK~@TqiL>I>aUHGW5^0Yd6|Ao2`$Wzj zYhLOf|Kh(G;GQSAS2~s}d%z+oXip>94!i3?vL+&q$BW8p%$x-D1lc45V?`@;Afyjv zmc2%rYfA0bYolZF__nBoM!AnS6mH}|XgbZhyab~&@xQrUnqXoWf-5d)PBZ(Lcw zW=W94bw}`ya}6Z+)T&dV0J=ZKt-__O#+uc0=!$xI+UvhE1YVX;9KhWejIuJljl5IqxG*){$eZVbr#N+W!oR~!k-rRJC?(C{DRgxP0p@5u$Oijz z^t$#H;v<#rX1wIm4WoXW2iV$nRY}9AUjdxjQ!lea7<=&={Sv$5PDY?*262DYwC>5& z=$aw$fw)W1Gu!UhHjc$IG=kP^Ly{Y&rVu*Vp5N%L?>mAsh@40a-LY$~yh4{hk8;^8 zkLePN;-4+~ISmF6mEYm&NoIrUA^EaWPm&S@QdiHvK#pu7%Mz(G3c8=_-H?^M|WfVoQ7uyX-Nkb+l zQTl2KQmwW%1-PfIIjz6#JDEuE92hJIw|02FiEYNFyS)C87x-{OSSWHe%L`kTg(^qK z;@a~c_;WEr4P~vM1bSZ{Vt9&Q4q5sFsB9r@tY?s0wvqv}v1fT-l0EH`;;jlmN-OA(J3Iv$< ztA=jcaoF9nvt4Fkr$2>+FUhy>tUQb-zPRzOla|#D%2O#waR#X;n4gJVf4XR3gv4`8x6YcVKyoY^$|15GtKSeri~71 zX}0@oILX^|*gh*Dw8_-?i+lWAR>wfucLeTh@pb?~D$fqZiFbpiqM9s6L1oLV(9=qk3n!hDm_qTOnK_LAn>_-0$A10?zI9KrVsK*#F z2m<#FQ3`1FaoAJJ5Gfd48bS@~1VKRNCE@v{mHBbYyYwz{fF_hjKrTyNIy5;Z^Q1tA zoq04V@V6uJ?T0ai4-ix-A~8jGRA}$KjtYRBt_UTMa*=(|*T53Ntnx$7qM74ALZTKB zSbL*OaZ^|WzrPS$jYMMKoSAZ=KE}?2K#GzoLGO%`ZX&Es@nz_`Dv@0lPskTxETR|ED*DL15wmtc|mFEUpNl z!hn9l!+ucqWzt-zHX4kD?LZHPr^u6gJ`R5Izp4ULk&Vfvt` z?y3jatzj3!BL@g>Tbl9cKkn8R zBD`tcCpiE9mXPs##()LrbEp5OKkRn!f#9}nBo6ZD&uDCcKmz;b|JCn*_E~PpXv>oG zZzJIY$nLJhh?wt;!E`pDoM#?9s;$7}ar6%3<_!WAAcP&26jqG=z8AvSU@us9?Ax5~ z$4rnQDhv|uTA&aA4w3#nu^JUX6td$Z_YO&%;<|AyrESla($kO4hG{r>^0iq=82Plt z)^qy3%F_ZwDVJCi|ILx$_kRQDV;HfGa*C4v`Snt Date: Wed, 11 Dec 2024 12:39:47 +0000 Subject: [PATCH 19/19] !fixup address latest comments, thanks! --- .../Transforms/Vectorize/LoopVectorize.cpp | 22 +++++++++---------- llvm/lib/Transforms/Vectorize/VPlan.h | 8 +++++++ .../Transforms/Vectorize/VPlanTransforms.cpp | 13 ++++++----- .../Transforms/Vectorize/VPlanTransforms.h | 2 +- 4 files changed, 26 insertions(+), 19 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index b47386389c82c1..ed00c844285c62 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7859,18 +7859,16 @@ DenseMap LoopVectorizationPlanner::executePlan( ILV.printDebugTracesAtEnd(); - // 4. Adjust branch weight of the branch in the middle block if it exists. - if (ExitVPBB) { - auto *MiddleTerm = - cast(State.CFG.VPBB2IRBB[ExitVPBB]->getTerminator()); - if (MiddleTerm->isConditional() && - hasBranchWeightMD(*OrigLoop->getLoopLatch()->getTerminator())) { - // Assume that `Count % VectorTripCount` is equally distributed. - unsigned TripCount = BestVPlan.getUF() * State.VF.getKnownMinValue(); - assert(TripCount > 0 && "trip count should not be zero"); - const uint32_t Weights[] = {1, TripCount - 1}; - setBranchWeights(*MiddleTerm, Weights, /*IsExpected=*/false); - } + // 4. Adjust branch weight of the branch in the middle block. + auto *MiddleTerm = + cast(State.CFG.VPBB2IRBB[ExitVPBB]->getTerminator()); + if (MiddleTerm->isConditional() && + hasBranchWeightMD(*OrigLoop->getLoopLatch()->getTerminator())) { + // Assume that `Count % VectorTripCount` is equally distributed. + unsigned TripCount = BestVPlan.getUF() * State.VF.getKnownMinValue(); + assert(TripCount > 0 && "trip count should not be zero"); + const uint32_t Weights[] = {1, TripCount - 1}; + setBranchWeights(*MiddleTerm, Weights, /*IsExpected=*/false); } return State.ExpandedSCEVs; diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 37e86cd6171422..7440a3a386fd2d 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -621,6 +621,14 @@ class VPBlockBase { /// Remove all the successors of this block. void clearSuccessors() { Successors.clear(); } + /// Swap successors of the block. The block must have exactly 2 successors. + // TODO: This should be part of introducing conditional branch recipes rather + // than being independent. + void swapSuccessors() { + assert(Successors.size() == 2 && "must have 2 successors to swap"); + std::swap(Successors[0], Successors[1]); + } + /// The method which generates the output IR that correspond to this /// VPBlockBase, thereby "executing" the VPlan. virtual void execute(VPTransformState *State) = 0; diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index b57b28ad101664..e27c1bfba93525 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1898,7 +1898,7 @@ void VPlanTransforms::handleUncountableEarlyExit( VPBasicBlock *NewMiddle = new VPBasicBlock("middle.split"); VPBlockUtils::insertOnEdge(LoopRegion, MiddleVPBB, NewMiddle); VPBlockUtils::connectBlocks(NewMiddle, VPEarlyExitBlock); - std::swap(NewMiddle->getSuccessors()[0], NewMiddle->getSuccessors()[1]); + NewMiddle->swapSuccessors(); VPBuilder MiddleBuilder(NewMiddle); MiddleBuilder.createNaryOp(VPInstruction::BranchOnCond, {IsEarlyExitTaken}); @@ -1906,13 +1906,14 @@ void VPlanTransforms::handleUncountableEarlyExit( // Replace the condition controlling the non-early exit from the vector loop // with one exiting if either the original condition of the vector latch is // true or the early exit has been taken. - auto *LatchExitingBranch = - dyn_cast(LatchVPBB->getTerminator()); + auto *LatchExitingBranch = cast(LatchVPBB->getTerminator()); + assert(LatchExitingBranch->getOpcode() == VPInstruction::BranchOnCount && + "Unexpected terminator"); auto *IsLatchExitTaken = Builder.createICmp(CmpInst::ICMP_EQ, LatchExitingBranch->getOperand(0), LatchExitingBranch->getOperand(1)); - auto *AnyExiting = Builder.createNaryOp(Instruction::Or, - {IsEarlyExitTaken, IsLatchExitTaken}); - Builder.createNaryOp(VPInstruction::BranchOnCond, AnyExiting); + auto *AnyExitTaken = Builder.createNaryOp( + Instruction::Or, {IsEarlyExitTaken, IsLatchExitTaken}); + Builder.createNaryOp(VPInstruction::BranchOnCond, AnyExitTaken); LatchExitingBranch->eraseFromParent(); } diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h index f7b09c1311ef7e..fddde868911665 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h @@ -128,7 +128,7 @@ struct VPlanTransforms { /// UncountableExitingBlock by /// * updating the condition exiting the vector loop to include the early /// exit conditions - /// * splitting the original middle block to branch to the early exit blocks + /// * splitting the original middle block to branch to the early exit block /// if taken. static void handleUncountableEarlyExit(VPlan &Plan, ScalarEvolution &SE, Loop *OrigLoop,