diff --git a/llvm/include/llvm/Transforms/Utils/LoopRotationUtils.h b/llvm/include/llvm/Transforms/Utils/LoopRotationUtils.h index c3643e0f27f94..d31d0af00a911 100644 --- a/llvm/include/llvm/Transforms/Utils/LoopRotationUtils.h +++ b/llvm/include/llvm/Transforms/Utils/LoopRotationUtils.h @@ -13,6 +13,7 @@ #ifndef LLVM_TRANSFORMS_UTILS_LOOPROTATIONUTILS_H #define LLVM_TRANSFORMS_UTILS_LOOPROTATIONUTILS_H +#include "llvm/ADT/STLExtras.h" #include "llvm/Support/Compiler.h" namespace llvm { @@ -32,12 +33,14 @@ class TargetTransformInfo; /// header. If the loop header's size exceeds the threshold, the loop rotation /// will give up. The flag IsUtilMode controls the heuristic used in the /// LoopRotation. If it is true, the profitability heuristic will be ignored. -LLVM_ABI bool LoopRotation(Loop *L, LoopInfo *LI, - const TargetTransformInfo *TTI, AssumptionCache *AC, - DominatorTree *DT, ScalarEvolution *SE, - MemorySSAUpdater *MSSAU, const SimplifyQuery &SQ, - bool RotationOnly, unsigned Threshold, - bool IsUtilMode, bool PrepareForLTO = false); +/// The ProfitabilityCheck function overrides the general profitability check. +LLVM_ABI bool LoopRotation( + Loop *L, LoopInfo *LI, const TargetTransformInfo *TTI, AssumptionCache *AC, + DominatorTree *DT, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, + const SimplifyQuery &SQ, bool RotationOnly, unsigned Threshold, + bool IsUtilMode, bool PrepareForLTO = false, + function_ref ProfitabilityCheck = + [](Loop *, ScalarEvolution *) { return false; }); } // namespace llvm diff --git a/llvm/include/llvm/Transforms/Utils/UnrollLoop.h b/llvm/include/llvm/Transforms/Utils/UnrollLoop.h index 765c613b04a44..395e78f3c1f8d 100644 --- a/llvm/include/llvm/Transforms/Utils/UnrollLoop.h +++ b/llvm/include/llvm/Transforms/Utils/UnrollLoop.h @@ -54,11 +54,14 @@ LLVM_ABI const Loop *addClonedBlockToLoopInfo(BasicBlock *OriginalBB, LoopInfo *LI, NewLoopsMap &NewLoops); -/// Represents the result of a \c UnrollLoop invocation. +/// Represents the result of a \c UnrollLoop and \c UnrollAndJamLoop invocation. enum class LoopUnrollResult { /// The loop was not modified. Unmodified, + /// The loop was modified, but not unrolled. + Modified, + /// The loop was partially unrolled -- we still have a loop, but with a /// smaller trip count. We may also have emitted epilogue loop if the loop /// had a non-constant trip count. diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp index a22d84dcf014d..8b1ab5a9e2181 100644 --- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -1357,8 +1357,9 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, ULO.RuntimeUnrollMultiExit = UP.RuntimeUnrollMultiExit; LoopUnrollResult UnrollResult = UnrollLoop( L, ULO, LI, &SE, &DT, &AC, &TTI, &ORE, PreserveLCSSA, &RemainderLoop, AA); - if (UnrollResult == LoopUnrollResult::Unmodified) - return LoopUnrollResult::Unmodified; + if (UnrollResult == LoopUnrollResult::Unmodified || + UnrollResult == LoopUnrollResult::Modified) + return UnrollResult; if (RemainderLoop) { std::optional RemainderLoopID = diff --git a/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp b/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp index 66d0573e83f65..3d93d8a1b7d4c 100644 --- a/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp @@ -69,16 +69,19 @@ class LoopRotate { bool RotationOnly; bool IsUtilMode; bool PrepareForLTO; + function_ref ProfitabilityCheck; public: LoopRotate(unsigned MaxHeaderSize, LoopInfo *LI, const TargetTransformInfo *TTI, AssumptionCache *AC, DominatorTree *DT, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, const SimplifyQuery &SQ, bool RotationOnly, bool IsUtilMode, - bool PrepareForLTO) + bool PrepareForLTO, + function_ref ProfitabilityCheck) : MaxHeaderSize(MaxHeaderSize), LI(LI), TTI(TTI), AC(AC), DT(DT), SE(SE), MSSAU(MSSAU), SQ(SQ), RotationOnly(RotationOnly), - IsUtilMode(IsUtilMode), PrepareForLTO(PrepareForLTO) {} + IsUtilMode(IsUtilMode), PrepareForLTO(PrepareForLTO), + ProfitabilityCheck(ProfitabilityCheck) {} bool processLoop(Loop *L); private: @@ -440,9 +443,9 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { // Rotate if either the loop latch does *not* exit the loop, or if the loop // latch was just simplified. Or if we think it will be profitable. - if (L->isLoopExiting(OrigLatch) && !SimplifiedLatch && IsUtilMode == false && - !profitableToRotateLoopExitingLatch(L) && - !canRotateDeoptimizingLatchExit(L)) + if (L->isLoopExiting(OrigLatch) && !SimplifiedLatch && + IsUtilMode == false && !profitableToRotateLoopExitingLatch(L) && + !canRotateDeoptimizingLatchExit(L) && !ProfitabilityCheck(L, SE)) return Rotated; // Check size of original header and reject loop if it is very big or we can't @@ -1053,13 +1056,14 @@ bool LoopRotate::processLoop(Loop *L) { /// The utility to convert a loop into a loop with bottom test. -bool llvm::LoopRotation(Loop *L, LoopInfo *LI, const TargetTransformInfo *TTI, - AssumptionCache *AC, DominatorTree *DT, - ScalarEvolution *SE, MemorySSAUpdater *MSSAU, - const SimplifyQuery &SQ, bool RotationOnly = true, - unsigned Threshold = unsigned(-1), - bool IsUtilMode = true, bool PrepareForLTO) { +bool llvm::LoopRotation( + Loop *L, LoopInfo *LI, const TargetTransformInfo *TTI, AssumptionCache *AC, + DominatorTree *DT, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, + const SimplifyQuery &SQ, bool RotationOnly = true, + unsigned Threshold = unsigned(-1), bool IsUtilMode = true, + bool PrepareForLTO, + function_ref ProfitabilityCheck) { LoopRotate LR(Threshold, LI, TTI, AC, DT, SE, MSSAU, SQ, RotationOnly, - IsUtilMode, PrepareForLTO); + IsUtilMode, PrepareForLTO, ProfitabilityCheck); return LR.processLoop(L); } diff --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/llvm/lib/Transforms/Utils/LoopUnroll.cpp index 86b268de43cf6..a3c6d9cff24f6 100644 --- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp +++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp @@ -58,6 +58,7 @@ #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/LoopRotationUtils.h" #include "llvm/Transforms/Utils/LoopSimplify.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/SimplifyIndVar.h" @@ -486,12 +487,7 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, // All these values should be taken only after peeling because they might have // changed. - BasicBlock *Preheader = L->getLoopPreheader(); - BasicBlock *Header = L->getHeader(); BasicBlock *LatchBlock = L->getLoopLatch(); - SmallVector ExitBlocks; - L->getExitBlocks(ExitBlocks); - std::vector OriginalLoopBlocks = L->getBlocks(); const unsigned MaxTripCount = SE->getSmallConstantMaxTripCount(L); const bool MaxOrZero = SE->isBackedgeTakenCountMaxOrZero(L); @@ -504,42 +500,6 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, if (MaxTripCount && ULO.Count > MaxTripCount) ULO.Count = MaxTripCount; - struct ExitInfo { - unsigned TripCount; - unsigned TripMultiple; - unsigned BreakoutTrip; - bool ExitOnTrue; - BasicBlock *FirstExitingBlock = nullptr; - SmallVector ExitingBlocks; - }; - DenseMap ExitInfos; - SmallVector ExitingBlocks; - L->getExitingBlocks(ExitingBlocks); - for (auto *ExitingBlock : ExitingBlocks) { - // The folding code is not prepared to deal with non-branch instructions - // right now. - auto *BI = dyn_cast(ExitingBlock->getTerminator()); - if (!BI) - continue; - - ExitInfo &Info = ExitInfos[ExitingBlock]; - Info.TripCount = SE->getSmallConstantTripCount(L, ExitingBlock); - Info.TripMultiple = SE->getSmallConstantTripMultiple(L, ExitingBlock); - if (Info.TripCount != 0) { - Info.BreakoutTrip = Info.TripCount % ULO.Count; - Info.TripMultiple = 0; - } else { - Info.BreakoutTrip = Info.TripMultiple = - (unsigned)std::gcd(ULO.Count, Info.TripMultiple); - } - Info.ExitOnTrue = !L->contains(BI->getSuccessor(0)); - Info.ExitingBlocks.push_back(ExitingBlock); - LLVM_DEBUG(dbgs() << " Exiting block %" << ExitingBlock->getName() - << ": TripCount=" << Info.TripCount - << ", TripMultiple=" << Info.TripMultiple - << ", BreakoutTrip=" << Info.BreakoutTrip << "\n"); - } - // Are we eliminating the loop control altogether? Note that we can know // we're eliminating the backedge without knowing exactly which iteration // of the unrolled body exits. @@ -552,17 +512,6 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, if (CompletelyUnroll) ULO.Runtime = false; - // Go through all exits of L and see if there are any phi-nodes there. We just - // conservatively assume that they're inserted to preserve LCSSA form, which - // means that complete unrolling might break this form. We need to either fix - // it in-place after the transformation, or entirely rebuild LCSSA. TODO: For - // now we just recompute LCSSA for the outer loop, but it should be possible - // to fix it in-place. - bool NeedToFixLCSSA = - PreserveLCSSA && CompletelyUnroll && - any_of(ExitBlocks, - [](const BasicBlock *BB) { return isa(BB->begin()); }); - // The current loop unroll pass can unroll loops that have // (1) single latch; and // (2a) latch is unconditional; or @@ -587,21 +536,116 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, UnrollRuntimeEpilog.getNumOccurrences() ? UnrollRuntimeEpilog : isEpilogProfitable(L); - if (ULO.Runtime && - !UnrollRuntimeLoopRemainder(L, ULO.Count, ULO.AllowExpensiveTripCount, - EpilogProfitability, ULO.UnrollRemainder, - ULO.ForgetAllSCEV, LI, SE, DT, AC, TTI, - PreserveLCSSA, ULO.SCEVExpansionBudget, - ULO.RuntimeUnrollMultiExit, RemainderLoop)) { + bool LoopRotated = false; + bool ReminderUnrolled = false; + if (ULO.Runtime) { + // Call unroll with disabled rotation, to see if it is possible without it. + ReminderUnrolled = UnrollRuntimeLoopRemainder( + L, ULO.Count, ULO.AllowExpensiveTripCount, EpilogProfitability, + ULO.UnrollRemainder, ULO.ForgetAllSCEV, LI, SE, DT, AC, TTI, + PreserveLCSSA, ULO.SCEVExpansionBudget, ULO.RuntimeUnrollMultiExit, + RemainderLoop); + + // If unroll is not possible, then try with loop rotation. + if (!ReminderUnrolled) { + BasicBlock *OrigHeader = L->getHeader(); + BranchInst *BI = dyn_cast(OrigHeader->getTerminator()); + if (BI && !BI->isUnconditional() && + isa(SE->getExitCount(L, L->getLoopLatch())) && + !isa(SE->getExitCount(L, OrigHeader))) { + LLVM_DEBUG( + dbgs() << " Rotating loop to make the exit count computable.\n"); + SimplifyQuery SQ{OrigHeader->getDataLayout()}; + SQ.TLI = nullptr; + SQ.DT = DT; + SQ.AC = AC; + LoopRotated = + llvm::LoopRotation(L, LI, TTI, AC, DT, SE, + /*MemorySSAUpdater*/ nullptr, SQ, + /*RotationOnly*/ false, /*Threshold*/ 16, + /*IsUtilMode*/ false, /*PrepareForLTO*/ false, + [](Loop *, ScalarEvolution *) { return true; }); + } + if (LoopRotated) { + // Loop was rotated, try unrolling. + ReminderUnrolled = UnrollRuntimeLoopRemainder( + L, ULO.Count, ULO.AllowExpensiveTripCount, EpilogProfitability, + ULO.UnrollRemainder, ULO.ForgetAllSCEV, LI, SE, DT, AC, TTI, + PreserveLCSSA, ULO.SCEVExpansionBudget, ULO.RuntimeUnrollMultiExit, + RemainderLoop); + } + } + // Latch block needs to be updated. + LatchBlock = L->getLoopLatch(); + LatchIsExiting = L->isLoopExiting(LatchBlock); + } + + if (ULO.Runtime && !ReminderUnrolled) { if (ULO.Force) ULO.Runtime = false; else { LLVM_DEBUG(dbgs() << "Won't unroll; remainder loop could not be " "generated when assuming runtime trip count\n"); - return LoopUnrollResult::Unmodified; + // Loop might have been rotated inside of UnrollRuntimeLoopRemainder and + // this needs to be propagated. + return LoopRotated ? LoopUnrollResult::Modified + : LoopUnrollResult::Unmodified; } } + BasicBlock *Preheader = L->getLoopPreheader(); + BasicBlock *Header = L->getHeader(); + SmallVector ExitBlocks; + L->getExitBlocks(ExitBlocks); + std::vector OriginalLoopBlocks = L->getBlocks(); + + // Go through all exits of L and see if there are any phi-nodes there. We just + // conservatively assume that they're inserted to preserve LCSSA form, which + // means that complete unrolling might break this form. We need to either fix + // it in-place after the transformation, or entirely rebuild LCSSA. TODO: For + // now we just recompute LCSSA for the outer loop, but it should be possible + // to fix it in-place. + bool NeedToFixLCSSA = + PreserveLCSSA && CompletelyUnroll && + any_of(ExitBlocks, + [](const BasicBlock *BB) { return isa(BB->begin()); }); + + struct ExitInfo { + unsigned TripCount; + unsigned TripMultiple; + unsigned BreakoutTrip; + bool ExitOnTrue; + BasicBlock *FirstExitingBlock = nullptr; + SmallVector ExitingBlocks; + }; + DenseMap ExitInfos; + SmallVector ExitingBlocks; + L->getExitingBlocks(ExitingBlocks); + for (auto *ExitingBlock : ExitingBlocks) { + // The folding code is not prepared to deal with non-branch instructions + // right now. + auto *BI = dyn_cast(ExitingBlock->getTerminator()); + if (!BI) + continue; + + ExitInfo &Info = ExitInfos[ExitingBlock]; + Info.TripCount = SE->getSmallConstantTripCount(L, ExitingBlock); + Info.TripMultiple = SE->getSmallConstantTripMultiple(L, ExitingBlock); + if (Info.TripCount != 0) { + Info.BreakoutTrip = Info.TripCount % ULO.Count; + Info.TripMultiple = 0; + } else { + Info.BreakoutTrip = Info.TripMultiple = + (unsigned)std::gcd(ULO.Count, Info.TripMultiple); + } + Info.ExitOnTrue = !L->contains(BI->getSuccessor(0)); + Info.ExitingBlocks.push_back(ExitingBlock); + LLVM_DEBUG(dbgs() << " Exiting block %" << ExitingBlock->getName() + << ": TripCount=" << Info.TripCount + << ", TripMultiple=" << Info.TripMultiple + << ", BreakoutTrip=" << Info.BreakoutTrip << "\n"); + } + using namespace ore; // Report the unrolling decision. if (CompletelyUnroll) { diff --git a/llvm/test/Transforms/LoopUnroll/full-unroll-avoid-partial.ll b/llvm/test/Transforms/LoopUnroll/full-unroll-avoid-partial.ll index 7f266a754d1bc..2414206e0ce08 100644 --- a/llvm/test/Transforms/LoopUnroll/full-unroll-avoid-partial.ll +++ b/llvm/test/Transforms/LoopUnroll/full-unroll-avoid-partial.ll @@ -10,10 +10,10 @@ ; LOOP-UNROLL-LABEL: Loop Unroll: F[pragma_unroll] Loop %for.body ; LOOP-UNROLL-NEXT: Loop Size = 9 ; LOOP-UNROLL-NEXT: runtime unrolling with count: 8 -; LOOP-UNROLL-NEXT: Exiting block %for.body: TripCount=0, TripMultiple=1, BreakoutTrip=1 ; LOOP-UNROLL-NEXT: Trying runtime unrolling on Loop: ; LOOP-UNROLL-NEXT: Loop at depth 1 containing: %for.body
; LOOP-UNROLL-NEXT: Using epilog remainder. +; LOOP-UNROLL-NEXT: Exiting block %for.body: TripCount=0, TripMultiple=8, BreakoutTrip=8 ; LOOP-UNROLL-NEXT: UNROLLING loop %for.body by 8 with run-time trip count! ; LOOP-UNROLL-FULL-LABEL: Loop Unroll: F[pragma_unroll] Loop %for.body @@ -49,10 +49,10 @@ for.body: ; preds = %for.body.preheader, ; LOOP-UNROLL-LABEL: Loop Unroll: F[pragma_unroll_count1] Loop %for.body ; LOOP-UNROLL-NEXT: Loop Size = 9 -; LOOP-UNROLL-NEXT: Exiting block %for.body: TripCount=0, TripMultiple=1, BreakoutTrip=1 ; LOOP-UNROLL-NEXT: Trying runtime unrolling on Loop: ; LOOP-UNROLL-NEXT: Loop at depth 1 containing: %for.body
; LOOP-UNROLL-NEXT: Using epilog remainder. +; LOOP-UNROLL-NEXT: Exiting block %for.body: TripCount=0, TripMultiple=1, BreakoutTrip=1 ; LOOP-UNROLL-NEXT: UNROLLING loop %for.body by 5 with run-time trip count! ; LOOP-UNROLL-FULL-LABEL: Loop Unroll: F[pragma_unroll_count1] Loop %for.body diff --git a/llvm/test/Transforms/LoopUnroll/runtime-loop-multiexit-dom-verify.ll b/llvm/test/Transforms/LoopUnroll/runtime-loop-multiexit-dom-verify.ll index de54852313456..b079abefaea65 100644 --- a/llvm/test/Transforms/LoopUnroll/runtime-loop-multiexit-dom-verify.ll +++ b/llvm/test/Transforms/LoopUnroll/runtime-loop-multiexit-dom-verify.ll @@ -16,48 +16,86 @@ define i64 @test1() { ; CHECK-NEXT: br label [[PREHEADER:%.*]] ; CHECK: preheader: ; CHECK-NEXT: [[TRIP:%.*]] = zext i32 undef to i64 +; CHECK-NEXT: br i1 false, label [[LATCH_LR_PH:%.*]], label [[HEADEREXIT:%.*]] +; CHECK: latch.lr.ph: +; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[TRIP]], -5 +; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = add nuw i64 [[TMP1]], 1 +; CHECK-NEXT: [[TMP3:%.*]] = freeze i64 [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[TMP3]], -1 +; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP3]], 3 +; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0 +; CHECK-NEXT: br i1 [[LCMP_MOD]], label [[LATCH_PROL_PREHEADER:%.*]], label [[LATCH_PROL_LOOPEXIT:%.*]] +; CHECK: latch.prol.preheader: ; CHECK-NEXT: br label [[HEADER:%.*]] -; CHECK: header: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 2, [[PREHEADER]] ], [ [[ADD_IV_3:%.*]], [[LATCH_3:%.*]] ] -; CHECK-NEXT: [[ADD_IV:%.*]] = add nuw nsw i64 [[IV]], 2 +; CHECK: latch.prol: +; CHECK-NEXT: [[ADD_IV1_PROL:%.*]] = phi i64 [ 4, [[LATCH_PROL_PREHEADER]] ], [ [[ADD_IV:%.*]], [[HEADER_PROL:%.*]] ] +; CHECK-NEXT: [[PROL_ITER:%.*]] = phi i64 [ 0, [[LATCH_PROL_PREHEADER]] ], [ [[PROL_ITER_NEXT:%.*]], [[HEADER_PROL]] ] +; CHECK-NEXT: [[SHFT_PROL:%.*]] = ashr i64 [[ADD_IV1_PROL]], 1 +; CHECK-NEXT: [[CMP2_PROL:%.*]] = icmp ult i64 [[SHFT_PROL]], [[TRIP]] +; CHECK-NEXT: br i1 [[CMP2_PROL]], label [[HEADER_PROL]], label [[LATCHEXIT_LOOPEXIT2:%.*]] +; CHECK: header.prol: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[ADD_IV1_PROL]], [[HEADER]] ] +; CHECK-NEXT: [[ADD_IV]] = add nuw nsw i64 [[IV]], 2 ; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i64 [[ADD_IV]], [[TRIP]] -; CHECK-NEXT: br i1 [[CMP1]], label [[LATCH:%.*]], label [[HEADEREXIT:%.*]] -; CHECK: latch: -; CHECK-NEXT: [[SHFT:%.*]] = ashr i64 [[ADD_IV]], 1 +; CHECK-NEXT: [[PROL_ITER_NEXT]] = add i64 [[PROL_ITER]], 1 +; CHECK-NEXT: [[PROL_ITER_CMP:%.*]] = icmp ne i64 [[PROL_ITER_NEXT]], [[XTRAITER]] +; CHECK-NEXT: br i1 [[PROL_ITER_CMP]], label [[HEADER]], label [[LATCH_PROL_LOOPEXIT_UNR_LCSSA:%.*]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: latch.prol.loopexit.unr-lcssa: +; CHECK-NEXT: [[ADD_IV1_UNR_PH:%.*]] = phi i64 [ [[ADD_IV]], [[HEADER_PROL]] ] +; CHECK-NEXT: [[SPLIT_UNR_PH:%.*]] = phi i64 [ [[ADD_IV]], [[HEADER_PROL]] ] +; CHECK-NEXT: br label [[LATCH_PROL_LOOPEXIT]] +; CHECK: latch.prol.loopexit: +; CHECK-NEXT: [[ADD_IV1_UNR:%.*]] = phi i64 [ 4, [[LATCH_LR_PH]] ], [ [[ADD_IV1_UNR_PH]], [[LATCH_PROL_LOOPEXIT_UNR_LCSSA]] ] +; CHECK-NEXT: [[SPLIT_UNR:%.*]] = phi i64 [ poison, [[LATCH_LR_PH]] ], [ [[SPLIT_UNR_PH]], [[LATCH_PROL_LOOPEXIT_UNR_LCSSA]] ] +; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i64 [[TMP4]], 3 +; CHECK-NEXT: br i1 [[TMP5]], label [[HEADER_HEADEREXIT_CRIT_EDGE:%.*]], label [[LATCH_LR_PH_NEW:%.*]] +; CHECK: latch.lr.ph.new: +; CHECK-NEXT: br label [[LATCH:%.*]] +; CHECK: header: +; CHECK-NEXT: [[ADD_IV2:%.*]] = add nuw nsw i64 [[ADD_IV1:%.*]], 2 +; CHECK-NEXT: [[SHFT:%.*]] = ashr i64 [[ADD_IV2]], 1 ; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i64 [[SHFT]], [[TRIP]] ; CHECK-NEXT: br i1 [[CMP2]], label [[HEADER_1:%.*]], label [[LATCHEXIT:%.*]] ; CHECK: header.1: -; CHECK-NEXT: [[ADD_IV_1:%.*]] = add nuw nsw i64 [[IV]], 4 -; CHECK-NEXT: [[CMP1_1:%.*]] = icmp ult i64 [[ADD_IV_1]], [[TRIP]] -; CHECK-NEXT: br i1 [[CMP1_1]], label [[LATCH_1:%.*]], label [[HEADEREXIT]] -; CHECK: latch.1: +; CHECK-NEXT: [[ADD_IV_1:%.*]] = add nuw nsw i64 [[ADD_IV1]], 4 ; CHECK-NEXT: [[SHFT_1:%.*]] = ashr i64 [[ADD_IV_1]], 1 ; CHECK-NEXT: [[CMP2_1:%.*]] = icmp ult i64 [[SHFT_1]], [[TRIP]] ; CHECK-NEXT: br i1 [[CMP2_1]], label [[HEADER_2:%.*]], label [[LATCHEXIT]] ; CHECK: header.2: -; CHECK-NEXT: [[ADD_IV_2:%.*]] = add nuw nsw i64 [[IV]], 6 -; CHECK-NEXT: [[CMP1_2:%.*]] = icmp ult i64 [[ADD_IV_2]], [[TRIP]] -; CHECK-NEXT: br i1 [[CMP1_2]], label [[LATCH_2:%.*]], label [[HEADEREXIT]] -; CHECK: latch.2: +; CHECK-NEXT: [[ADD_IV_2:%.*]] = add nuw nsw i64 [[ADD_IV1]], 6 ; CHECK-NEXT: [[SHFT_2:%.*]] = ashr i64 [[ADD_IV_2]], 1 ; CHECK-NEXT: [[CMP2_2:%.*]] = icmp ult i64 [[SHFT_2]], [[TRIP]] ; CHECK-NEXT: br i1 [[CMP2_2]], label [[HEADER_3:%.*]], label [[LATCHEXIT]] ; CHECK: header.3: -; CHECK-NEXT: [[ADD_IV_3]] = add nuw nsw i64 [[IV]], 8 +; CHECK-NEXT: [[ADD_IV_3:%.*]] = add nuw nsw i64 [[ADD_IV1]], 8 ; CHECK-NEXT: [[CMP1_3:%.*]] = icmp ult i64 [[ADD_IV_3]], [[TRIP]] -; CHECK-NEXT: br i1 [[CMP1_3]], label [[LATCH_3]], label [[HEADEREXIT]] -; CHECK: latch.3: -; CHECK-NEXT: [[SHFT_3:%.*]] = ashr i64 [[ADD_IV_3]], 1 +; CHECK-NEXT: br i1 [[CMP1_3]], label [[LATCH]], label [[HEADER_HEADEREXIT_CRIT_EDGE_UNR_LCSSA:%.*]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK: latch: +; CHECK-NEXT: [[ADD_IV1]] = phi i64 [ [[ADD_IV1_UNR]], [[LATCH_LR_PH_NEW]] ], [ [[ADD_IV_3]], [[HEADER_3]] ] +; CHECK-NEXT: [[SHFT_3:%.*]] = ashr i64 [[ADD_IV1]], 1 ; CHECK-NEXT: [[CMP2_3:%.*]] = icmp ult i64 [[SHFT_3]], [[TRIP]] -; CHECK-NEXT: br i1 [[CMP2_3]], label [[HEADER]], label [[LATCHEXIT]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: br i1 [[CMP2_3]], label [[HEADER1:%.*]], label [[LATCHEXIT]] +; CHECK: header.headerexit_crit_edge.unr-lcssa: +; CHECK-NEXT: [[SPLIT_PH:%.*]] = phi i64 [ [[ADD_IV_3]], [[HEADER_3]] ] +; CHECK-NEXT: br label [[HEADER_HEADEREXIT_CRIT_EDGE]] +; CHECK: header.headerexit_crit_edge: +; CHECK-NEXT: [[SPLIT:%.*]] = phi i64 [ [[SPLIT_UNR]], [[LATCH_PROL_LOOPEXIT]] ], [ [[SPLIT_PH]], [[HEADER_HEADEREXIT_CRIT_EDGE_UNR_LCSSA]] ] +; CHECK-NEXT: br label [[HEADEREXIT]] ; CHECK: headerexit: -; CHECK-NEXT: [[ADDPHI:%.*]] = phi i64 [ [[ADD_IV]], [[HEADER]] ], [ [[ADD_IV_1]], [[HEADER_1]] ], [ [[ADD_IV_2]], [[HEADER_2]] ], [ [[ADD_IV_3]], [[HEADER_3]] ] +; CHECK-NEXT: [[ADDPHI:%.*]] = phi i64 [ [[SPLIT]], [[HEADER_HEADEREXIT_CRIT_EDGE]] ], [ 4, [[PREHEADER]] ] +; CHECK-NEXT: br label [[MERGEDEXIT1:%.*]] +; CHECK: latchexit.loopexit: +; CHECK-NEXT: [[SHFTPHI_PH:%.*]] = phi i64 [ [[SHFT_3]], [[LATCH]] ], [ [[SHFT]], [[HEADER1]] ], [ [[SHFT_1]], [[HEADER_1]] ], [ [[SHFT_2]], [[HEADER_2]] ] ; CHECK-NEXT: br label [[MERGEDEXIT:%.*]] -; CHECK: latchexit: -; CHECK-NEXT: [[SHFTPHI:%.*]] = phi i64 [ [[SHFT]], [[LATCH]] ], [ [[SHFT_1]], [[LATCH_1]] ], [ [[SHFT_2]], [[LATCH_2]] ], [ [[SHFT_3]], [[LATCH_3]] ] +; CHECK: latchexit.loopexit2: +; CHECK-NEXT: [[SHFTPHI_PH3:%.*]] = phi i64 [ [[SHFT_PROL]], [[HEADER]] ] ; CHECK-NEXT: br label [[MERGEDEXIT]] +; CHECK: latchexit: +; CHECK-NEXT: [[SHFTPHI:%.*]] = phi i64 [ [[SHFTPHI_PH]], [[LATCHEXIT]] ], [ [[SHFTPHI_PH3]], [[LATCHEXIT_LOOPEXIT2]] ] +; CHECK-NEXT: br label [[MERGEDEXIT1]] ; CHECK: mergedexit: -; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[ADDPHI]], [[HEADEREXIT]] ], [ [[SHFTPHI]], [[LATCHEXIT]] ] +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[ADDPHI]], [[HEADEREXIT]] ], [ [[SHFTPHI]], [[MERGEDEXIT]] ] ; CHECK-NEXT: ret i64 [[RETVAL]] ; entry: @@ -98,42 +136,75 @@ define void @test2(i1 %cond, i32 %n) { ; CHECK-NEXT: br i1 [[COND:%.*]], label [[PREHEADER:%.*]], label [[MERGEDEXIT:%.*]] ; CHECK: preheader: ; CHECK-NEXT: [[TRIP:%.*]] = zext i32 [[N:%.*]] to i64 +; CHECK-NEXT: [[CMP11:%.*]] = icmp ult i64 4, [[TRIP]] +; CHECK-NEXT: br i1 [[CMP11]], label [[LATCH_LR_PH:%.*]], label [[HEADEREXIT:%.*]] +; CHECK: latch.lr.ph: +; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[TRIP]], -5 +; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = add nuw i64 [[TMP1]], 1 +; CHECK-NEXT: [[TMP3:%.*]] = freeze i64 [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[TMP3]], -1 +; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP3]], 3 +; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0 +; CHECK-NEXT: br i1 [[LCMP_MOD]], label [[LATCH_PROL_PREHEADER:%.*]], label [[LATCH_PROL_LOOPEXIT:%.*]] +; CHECK: latch.prol.preheader: ; CHECK-NEXT: br label [[HEADER:%.*]] -; CHECK: header: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 2, [[PREHEADER]] ], [ [[ADD_IV_3:%.*]], [[LATCH_3:%.*]] ] -; CHECK-NEXT: [[ADD_IV:%.*]] = add nuw nsw i64 [[IV]], 2 +; CHECK: latch.prol: +; CHECK-NEXT: [[ADD_IV2_PROL:%.*]] = phi i64 [ 4, [[LATCH_PROL_PREHEADER]] ], [ [[ADD_IV:%.*]], [[HEADER_PROL:%.*]] ] +; CHECK-NEXT: [[PROL_ITER:%.*]] = phi i64 [ 0, [[LATCH_PROL_PREHEADER]] ], [ [[PROL_ITER_NEXT:%.*]], [[HEADER_PROL]] ] +; CHECK-NEXT: [[SHFT_PROL:%.*]] = ashr i64 [[ADD_IV2_PROL]], 1 +; CHECK-NEXT: [[CMP2_PROL:%.*]] = icmp ult i64 [[SHFT_PROL]], [[TRIP]] +; CHECK-NEXT: br i1 [[CMP2_PROL]], label [[HEADER_PROL]], label [[LATCHEXIT_LOOPEXIT3:%.*]] +; CHECK: header.prol: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[ADD_IV2_PROL]], [[HEADER]] ] +; CHECK-NEXT: [[ADD_IV]] = add nuw nsw i64 [[IV]], 2 ; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i64 [[ADD_IV]], [[TRIP]] -; CHECK-NEXT: br i1 [[CMP1]], label [[LATCH:%.*]], label [[HEADEREXIT:%.*]] -; CHECK: latch: -; CHECK-NEXT: [[SHFT:%.*]] = ashr i64 [[ADD_IV]], 1 +; CHECK-NEXT: [[PROL_ITER_NEXT]] = add i64 [[PROL_ITER]], 1 +; CHECK-NEXT: [[PROL_ITER_CMP:%.*]] = icmp ne i64 [[PROL_ITER_NEXT]], [[XTRAITER]] +; CHECK-NEXT: br i1 [[PROL_ITER_CMP]], label [[HEADER]], label [[LATCH_PROL_LOOPEXIT_UNR_LCSSA:%.*]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK: latch.prol.loopexit.unr-lcssa: +; CHECK-NEXT: [[ADD_IV2_UNR_PH:%.*]] = phi i64 [ [[ADD_IV]], [[HEADER_PROL]] ] +; CHECK-NEXT: br label [[LATCH_PROL_LOOPEXIT]] +; CHECK: latch.prol.loopexit: +; CHECK-NEXT: [[ADD_IV2_UNR:%.*]] = phi i64 [ 4, [[LATCH_LR_PH]] ], [ [[ADD_IV2_UNR_PH]], [[LATCH_PROL_LOOPEXIT_UNR_LCSSA]] ] +; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i64 [[TMP4]], 3 +; CHECK-NEXT: br i1 [[TMP5]], label [[HEADER_HEADEREXIT_CRIT_EDGE:%.*]], label [[LATCH_LR_PH_NEW:%.*]] +; CHECK: latch.lr.ph.new: +; CHECK-NEXT: br label [[LATCH:%.*]] +; CHECK: header: +; CHECK-NEXT: [[ADD_IV1:%.*]] = add nuw nsw i64 [[ADD_IV2:%.*]], 2 +; CHECK-NEXT: [[SHFT:%.*]] = ashr i64 [[ADD_IV1]], 1 ; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i64 [[SHFT]], [[TRIP]] ; CHECK-NEXT: br i1 [[CMP2]], label [[HEADER_1:%.*]], label [[LATCHEXIT:%.*]] ; CHECK: header.1: -; CHECK-NEXT: [[ADD_IV_1:%.*]] = add nuw nsw i64 [[IV]], 4 -; CHECK-NEXT: [[CMP1_1:%.*]] = icmp ult i64 [[ADD_IV_1]], [[TRIP]] -; CHECK-NEXT: br i1 [[CMP1_1]], label [[LATCH_1:%.*]], label [[HEADEREXIT]] -; CHECK: latch.1: +; CHECK-NEXT: [[ADD_IV_1:%.*]] = add nuw nsw i64 [[ADD_IV2]], 4 ; CHECK-NEXT: [[SHFT_1:%.*]] = ashr i64 [[ADD_IV_1]], 1 ; CHECK-NEXT: [[CMP2_1:%.*]] = icmp ult i64 [[SHFT_1]], [[TRIP]] ; CHECK-NEXT: br i1 [[CMP2_1]], label [[HEADER_2:%.*]], label [[LATCHEXIT]] ; CHECK: header.2: -; CHECK-NEXT: [[ADD_IV_2:%.*]] = add nuw nsw i64 [[IV]], 6 -; CHECK-NEXT: [[CMP1_2:%.*]] = icmp ult i64 [[ADD_IV_2]], [[TRIP]] -; CHECK-NEXT: br i1 [[CMP1_2]], label [[LATCH_2:%.*]], label [[HEADEREXIT]] -; CHECK: latch.2: +; CHECK-NEXT: [[ADD_IV_2:%.*]] = add nuw nsw i64 [[ADD_IV2]], 6 ; CHECK-NEXT: [[SHFT_2:%.*]] = ashr i64 [[ADD_IV_2]], 1 ; CHECK-NEXT: [[CMP2_2:%.*]] = icmp ult i64 [[SHFT_2]], [[TRIP]] ; CHECK-NEXT: br i1 [[CMP2_2]], label [[HEADER_3:%.*]], label [[LATCHEXIT]] ; CHECK: header.3: -; CHECK-NEXT: [[ADD_IV_3]] = add nuw nsw i64 [[IV]], 8 +; CHECK-NEXT: [[ADD_IV_3:%.*]] = add nuw nsw i64 [[ADD_IV2]], 8 ; CHECK-NEXT: [[CMP1_3:%.*]] = icmp ult i64 [[ADD_IV_3]], [[TRIP]] -; CHECK-NEXT: br i1 [[CMP1_3]], label [[LATCH_3]], label [[HEADEREXIT]] -; CHECK: latch.3: -; CHECK-NEXT: [[SHFT_3:%.*]] = ashr i64 [[ADD_IV_3]], 1 +; CHECK-NEXT: br i1 [[CMP1_3]], label [[LATCH]], label [[HEADER_HEADEREXIT_CRIT_EDGE_UNR_LCSSA:%.*]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK: latch: +; CHECK-NEXT: [[ADD_IV2]] = phi i64 [ [[ADD_IV2_UNR]], [[LATCH_LR_PH_NEW]] ], [ [[ADD_IV_3]], [[HEADER_3]] ] +; CHECK-NEXT: [[SHFT_3:%.*]] = ashr i64 [[ADD_IV2]], 1 ; CHECK-NEXT: [[CMP2_3:%.*]] = icmp ult i64 [[SHFT_3]], [[TRIP]] -; CHECK-NEXT: br i1 [[CMP2_3]], label [[HEADER]], label [[LATCHEXIT]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK-NEXT: br i1 [[CMP2_3]], label [[HEADER1:%.*]], label [[LATCHEXIT]] +; CHECK: header.headerexit_crit_edge.unr-lcssa: +; CHECK-NEXT: br label [[HEADER_HEADEREXIT_CRIT_EDGE]] +; CHECK: header.headerexit_crit_edge: +; CHECK-NEXT: br label [[HEADEREXIT]] ; CHECK: headerexit: ; CHECK-NEXT: br label [[MERGEDEXIT]] +; CHECK: latchexit.loopexit: +; CHECK-NEXT: br label [[LATCHEXIT1:%.*]] +; CHECK: latchexit.loopexit3: +; CHECK-NEXT: br label [[LATCHEXIT1]] ; CHECK: latchexit: ; CHECK-NEXT: br label [[MERGEDEXIT]] ; CHECK: mergedexit: @@ -175,44 +246,79 @@ define i64 @test3(i32 %n) { ; CHECK-NEXT: br label [[PREHEADER:%.*]] ; CHECK: preheader: ; CHECK-NEXT: [[TRIP:%.*]] = zext i32 [[N:%.*]] to i64 +; CHECK-NEXT: [[CMP11:%.*]] = icmp ult i64 4, [[TRIP]] +; CHECK-NEXT: br i1 [[CMP11]], label [[LATCH_LR_PH:%.*]], label [[HEADEREXIT:%.*]] +; CHECK: latch.lr.ph: +; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[TRIP]], -5 +; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = add nuw i64 [[TMP1]], 1 +; CHECK-NEXT: [[TMP3:%.*]] = freeze i64 [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[TMP3]], -1 +; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP3]], 3 +; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0 +; CHECK-NEXT: br i1 [[LCMP_MOD]], label [[LATCH_PROL_PREHEADER:%.*]], label [[LATCH_PROL_LOOPEXIT:%.*]] +; CHECK: latch.prol.preheader: ; CHECK-NEXT: br label [[HEADER:%.*]] -; CHECK: header: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 2, [[PREHEADER]] ], [ [[ADD_IV_3:%.*]], [[LATCH_3:%.*]] ] -; CHECK-NEXT: [[ADD_IV:%.*]] = add nuw nsw i64 [[IV]], 2 +; CHECK: latch.prol: +; CHECK-NEXT: [[ADD_IV2_PROL:%.*]] = phi i64 [ 4, [[LATCH_PROL_PREHEADER]] ], [ [[ADD_IV:%.*]], [[HEADER_PROL:%.*]] ] +; CHECK-NEXT: [[PROL_ITER:%.*]] = phi i64 [ 0, [[LATCH_PROL_PREHEADER]] ], [ [[PROL_ITER_NEXT:%.*]], [[HEADER_PROL]] ] +; CHECK-NEXT: [[SHFT_PROL:%.*]] = ashr i64 [[ADD_IV2_PROL]], 1 +; CHECK-NEXT: [[CMP2_PROL:%.*]] = icmp ult i64 [[SHFT_PROL]], [[TRIP]] +; CHECK-NEXT: br i1 [[CMP2_PROL]], label [[HEADER_PROL]], label [[LATCHEXIT_LOOPEXIT3:%.*]] +; CHECK: header.prol: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[ADD_IV2_PROL]], [[HEADER]] ] +; CHECK-NEXT: [[ADD_IV]] = add nuw nsw i64 [[IV]], 2 ; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i64 [[ADD_IV]], [[TRIP]] -; CHECK-NEXT: br i1 [[CMP1]], label [[LATCH:%.*]], label [[HEADEREXIT:%.*]] -; CHECK: latch: -; CHECK-NEXT: [[SHFT:%.*]] = ashr i64 [[ADD_IV]], 1 +; CHECK-NEXT: [[PROL_ITER_NEXT]] = add i64 [[PROL_ITER]], 1 +; CHECK-NEXT: [[PROL_ITER_CMP:%.*]] = icmp ne i64 [[PROL_ITER_NEXT]], [[XTRAITER]] +; CHECK-NEXT: br i1 [[PROL_ITER_CMP]], label [[HEADER]], label [[LATCH_PROL_LOOPEXIT_UNR_LCSSA:%.*]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK: latch.prol.loopexit.unr-lcssa: +; CHECK-NEXT: [[ADD_IV2_UNR_PH:%.*]] = phi i64 [ [[ADD_IV]], [[HEADER_PROL]] ] +; CHECK-NEXT: br label [[LATCH_PROL_LOOPEXIT]] +; CHECK: latch.prol.loopexit: +; CHECK-NEXT: [[ADD_IV2_UNR:%.*]] = phi i64 [ 4, [[LATCH_LR_PH]] ], [ [[ADD_IV2_UNR_PH]], [[LATCH_PROL_LOOPEXIT_UNR_LCSSA]] ] +; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i64 [[TMP4]], 3 +; CHECK-NEXT: br i1 [[TMP5]], label [[HEADER_HEADEREXIT_CRIT_EDGE:%.*]], label [[LATCH_LR_PH_NEW:%.*]] +; CHECK: latch.lr.ph.new: +; CHECK-NEXT: br label [[LATCH:%.*]] +; CHECK: header: +; CHECK-NEXT: [[ADD_IV1:%.*]] = add nuw nsw i64 [[ADD_IV2:%.*]], 2 +; CHECK-NEXT: [[SHFT:%.*]] = ashr i64 [[ADD_IV1]], 1 ; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i64 [[SHFT]], [[TRIP]] ; CHECK-NEXT: br i1 [[CMP2]], label [[HEADER_1:%.*]], label [[LATCHEXIT:%.*]] ; CHECK: header.1: -; CHECK-NEXT: [[ADD_IV_1:%.*]] = add nuw nsw i64 [[IV]], 4 -; CHECK-NEXT: [[CMP1_1:%.*]] = icmp ult i64 [[ADD_IV_1]], [[TRIP]] -; CHECK-NEXT: br i1 [[CMP1_1]], label [[LATCH_1:%.*]], label [[HEADEREXIT]] -; CHECK: latch.1: +; CHECK-NEXT: [[ADD_IV_1:%.*]] = add nuw nsw i64 [[ADD_IV2]], 4 ; CHECK-NEXT: [[SHFT_1:%.*]] = ashr i64 [[ADD_IV_1]], 1 ; CHECK-NEXT: [[CMP2_1:%.*]] = icmp ult i64 [[SHFT_1]], [[TRIP]] ; CHECK-NEXT: br i1 [[CMP2_1]], label [[HEADER_2:%.*]], label [[LATCHEXIT]] ; CHECK: header.2: -; CHECK-NEXT: [[ADD_IV_2:%.*]] = add nuw nsw i64 [[IV]], 6 -; CHECK-NEXT: [[CMP1_2:%.*]] = icmp ult i64 [[ADD_IV_2]], [[TRIP]] -; CHECK-NEXT: br i1 [[CMP1_2]], label [[LATCH_2:%.*]], label [[HEADEREXIT]] -; CHECK: latch.2: +; CHECK-NEXT: [[ADD_IV_2:%.*]] = add nuw nsw i64 [[ADD_IV2]], 6 ; CHECK-NEXT: [[SHFT_2:%.*]] = ashr i64 [[ADD_IV_2]], 1 ; CHECK-NEXT: [[CMP2_2:%.*]] = icmp ult i64 [[SHFT_2]], [[TRIP]] ; CHECK-NEXT: br i1 [[CMP2_2]], label [[HEADER_3:%.*]], label [[LATCHEXIT]] ; CHECK: header.3: -; CHECK-NEXT: [[ADD_IV_3]] = add nuw nsw i64 [[IV]], 8 +; CHECK-NEXT: [[ADD_IV_3:%.*]] = add nuw nsw i64 [[ADD_IV2]], 8 ; CHECK-NEXT: [[CMP1_3:%.*]] = icmp ult i64 [[ADD_IV_3]], [[TRIP]] -; CHECK-NEXT: br i1 [[CMP1_3]], label [[LATCH_3]], label [[HEADEREXIT]] -; CHECK: latch.3: -; CHECK-NEXT: [[SHFT_3:%.*]] = ashr i64 [[ADD_IV_3]], 1 +; CHECK-NEXT: br i1 [[CMP1_3]], label [[LATCH]], label [[HEADER_HEADEREXIT_CRIT_EDGE_UNR_LCSSA:%.*]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK: latch: +; CHECK-NEXT: [[ADD_IV2]] = phi i64 [ [[ADD_IV2_UNR]], [[LATCH_LR_PH_NEW]] ], [ [[ADD_IV_3]], [[HEADER_3]] ] +; CHECK-NEXT: [[SHFT_3:%.*]] = ashr i64 [[ADD_IV2]], 1 ; CHECK-NEXT: [[CMP2_3:%.*]] = icmp ult i64 [[SHFT_3]], [[TRIP]] -; CHECK-NEXT: br i1 [[CMP2_3]], label [[HEADER]], label [[LATCHEXIT]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-NEXT: br i1 [[CMP2_3]], label [[HEADER1:%.*]], label [[LATCHEXIT]] +; CHECK: header.headerexit_crit_edge.unr-lcssa: +; CHECK-NEXT: br label [[HEADER_HEADEREXIT_CRIT_EDGE]] +; CHECK: header.headerexit_crit_edge: +; CHECK-NEXT: br label [[HEADEREXIT]] ; CHECK: headerexit: ; CHECK-NEXT: br label [[EXITSUCC:%.*]] +; CHECK: latchexit.loopexit: +; CHECK-NEXT: [[SHFTPHI_PH:%.*]] = phi i64 [ [[SHFT_3]], [[LATCH]] ], [ [[SHFT]], [[HEADER1]] ], [ [[SHFT_1]], [[HEADER_1]] ], [ [[SHFT_2]], [[HEADER_2]] ] +; CHECK-NEXT: br label [[LATCHEXIT1:%.*]] +; CHECK: latchexit.loopexit3: +; CHECK-NEXT: [[SHFTPHI_PH4:%.*]] = phi i64 [ [[SHFT_PROL]], [[HEADER]] ] +; CHECK-NEXT: br label [[LATCHEXIT1]] ; CHECK: latchexit: -; CHECK-NEXT: [[SHFTPHI:%.*]] = phi i64 [ [[SHFT]], [[LATCH]] ], [ [[SHFT_1]], [[LATCH_1]] ], [ [[SHFT_2]], [[LATCH_2]] ], [ [[SHFT_3]], [[LATCH_3]] ] +; CHECK-NEXT: [[SHFTPHI:%.*]] = phi i64 [ [[SHFTPHI_PH]], [[LATCHEXIT]] ], [ [[SHFTPHI_PH4]], [[LATCHEXIT_LOOPEXIT3]] ] ; CHECK-NEXT: ret i64 [[SHFTPHI]] ; CHECK: exitsucc: ; CHECK-NEXT: ret i64 96 @@ -265,15 +371,15 @@ define void @test4(i16 %c3) { ; CHECK-NEXT: br label [[EXITING_PROL:%.*]] ; CHECK: exiting.prol: ; CHECK-NEXT: switch i16 [[C3:%.*]], label [[DEFAULT_LOOPEXIT_LOOPEXIT1:%.*]] [ -; CHECK-NEXT: i16 45, label [[OTHEREXIT_LOOPEXIT2:%.*]] -; CHECK-NEXT: i16 95, label [[LATCH_PROL]] +; CHECK-NEXT: i16 45, label [[OTHEREXIT_LOOPEXIT2:%.*]] +; CHECK-NEXT: i16 95, label [[LATCH_PROL]] ; CHECK-NEXT: ] ; CHECK: latch.prol: ; CHECK-NEXT: [[INDVARS_IV_NEXT_PROL]] = add nuw nsw i64 [[INDVARS_IV_PROL]], 1 ; CHECK-NEXT: [[C2_PROL:%.*]] = icmp ult i64 [[INDVARS_IV_NEXT_PROL]], [[C1]] ; CHECK-NEXT: [[PROL_ITER_NEXT]] = add i64 [[PROL_ITER]], 1 ; CHECK-NEXT: [[PROL_ITER_CMP:%.*]] = icmp ne i64 [[PROL_ITER_NEXT]], [[XTRAITER]] -; CHECK-NEXT: br i1 [[PROL_ITER_CMP]], label [[HEADER_PROL]], label [[HEADER_PROL_LOOPEXIT_UNR_LCSSA:%.*]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: br i1 [[PROL_ITER_CMP]], label [[HEADER_PROL]], label [[HEADER_PROL_LOOPEXIT_UNR_LCSSA:%.*]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: header.prol.loopexit.unr-lcssa: ; CHECK-NEXT: [[INDVARS_IV_UNR_PH:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_PROL]], [[LATCH_PROL]] ] ; CHECK-NEXT: br label [[HEADER_PROL_LOOPEXIT]] @@ -288,34 +394,34 @@ define void @test4(i16 %c3) { ; CHECK-NEXT: br label [[EXITING:%.*]] ; CHECK: exiting: ; CHECK-NEXT: switch i16 [[C3]], label [[DEFAULT_LOOPEXIT_LOOPEXIT:%.*]] [ -; CHECK-NEXT: i16 45, label [[OTHEREXIT_LOOPEXIT:%.*]] -; CHECK-NEXT: i16 95, label [[LATCH:%.*]] +; CHECK-NEXT: i16 45, label [[OTHEREXIT_LOOPEXIT:%.*]] +; CHECK-NEXT: i16 95, label [[LATCH:%.*]] ; CHECK-NEXT: ] ; CHECK: latch: ; CHECK-NEXT: br label [[EXITING_1:%.*]] ; CHECK: exiting.1: ; CHECK-NEXT: switch i16 [[C3]], label [[DEFAULT_LOOPEXIT_LOOPEXIT]] [ -; CHECK-NEXT: i16 45, label [[OTHEREXIT_LOOPEXIT]] -; CHECK-NEXT: i16 95, label [[LATCH_1:%.*]] +; CHECK-NEXT: i16 45, label [[OTHEREXIT_LOOPEXIT]] +; CHECK-NEXT: i16 95, label [[LATCH_1:%.*]] ; CHECK-NEXT: ] ; CHECK: latch.1: ; CHECK-NEXT: br label [[EXITING_2:%.*]] ; CHECK: exiting.2: ; CHECK-NEXT: switch i16 [[C3]], label [[DEFAULT_LOOPEXIT_LOOPEXIT]] [ -; CHECK-NEXT: i16 45, label [[OTHEREXIT_LOOPEXIT]] -; CHECK-NEXT: i16 95, label [[LATCH_2:%.*]] +; CHECK-NEXT: i16 45, label [[OTHEREXIT_LOOPEXIT]] +; CHECK-NEXT: i16 95, label [[LATCH_2:%.*]] ; CHECK-NEXT: ] ; CHECK: latch.2: ; CHECK-NEXT: br label [[EXITING_3:%.*]] ; CHECK: exiting.3: ; CHECK-NEXT: switch i16 [[C3]], label [[DEFAULT_LOOPEXIT_LOOPEXIT]] [ -; CHECK-NEXT: i16 45, label [[OTHEREXIT_LOOPEXIT]] -; CHECK-NEXT: i16 95, label [[LATCH_3]] +; CHECK-NEXT: i16 45, label [[OTHEREXIT_LOOPEXIT]] +; CHECK-NEXT: i16 95, label [[LATCH_3]] ; CHECK-NEXT: ] ; CHECK: latch.3: ; CHECK-NEXT: [[INDVARS_IV_NEXT_3]] = add nuw nsw i64 [[INDVARS_IV]], 4 ; CHECK-NEXT: [[C2_3:%.*]] = icmp ult i64 [[INDVARS_IV_NEXT_3]], [[C1]] -; CHECK-NEXT: br i1 [[C2_3]], label [[HEADER]], label [[LATCHEXIT_UNR_LCSSA:%.*]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: br i1 [[C2_3]], label [[HEADER]], label [[LATCHEXIT_UNR_LCSSA:%.*]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: latchexit.unr-lcssa: ; CHECK-NEXT: br label [[LATCHEXIT]] ; CHECK: latchexit: @@ -414,13 +520,13 @@ define void @test5() { ; CHECK-NEXT: [[C2_3_PROL:%.*]] = call i1 @unknown(i32 0) ; CHECK-NEXT: br i1 [[C2_3_PROL]], label [[INNERLATCH_3_PROL:%.*]], label [[EXITB_LOOPEXIT_LOOPEXIT2]] ; CHECK: innerLatch.3.prol: -; CHECK-NEXT: br i1 false, label [[INNERH_PROL]], label [[OUTERLATCH_PROL]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: br i1 false, label [[INNERH_PROL]], label [[OUTERLATCH_PROL]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: outerLatch.prol: ; CHECK-NEXT: [[TMP6_PROL]] = add i32 [[TMP4_PROL]], 1 ; CHECK-NEXT: [[TMP7_PROL:%.*]] = icmp sgt i32 [[TMP6_PROL]], 79 ; CHECK-NEXT: [[PROL_ITER_NEXT]] = add i32 [[PROL_ITER]], 1 ; CHECK-NEXT: [[PROL_ITER_CMP:%.*]] = icmp ne i32 [[PROL_ITER_NEXT]], [[XTRAITER]] -; CHECK-NEXT: br i1 [[PROL_ITER_CMP]], label [[OUTERH_PROL]], label [[OUTERH_PROL_LOOPEXIT_UNR_LCSSA:%.*]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-NEXT: br i1 [[PROL_ITER_CMP]], label [[OUTERH_PROL]], label [[OUTERH_PROL_LOOPEXIT_UNR_LCSSA:%.*]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: outerH.prol.loopexit.unr-lcssa: ; CHECK-NEXT: [[TMP4_UNR_PH:%.*]] = phi i32 [ [[TMP6_PROL]], [[OUTERLATCH_PROL]] ] ; CHECK-NEXT: br label [[OUTERH_PROL_LOOPEXIT]] @@ -464,7 +570,7 @@ define void @test5() { ; CHECK-NEXT: [[C2_3:%.*]] = call i1 @unknown(i32 0) ; CHECK-NEXT: br i1 [[C2_3]], label [[INNERLATCH_3:%.*]], label [[EXITB_LOOPEXIT_LOOPEXIT_LOOPEXIT]] ; CHECK: innerLatch.3: -; CHECK-NEXT: br i1 false, label [[INNERH]], label [[OUTERLATCH]], !llvm.loop [[LOOP6]] +; CHECK-NEXT: br i1 false, label [[INNERH]], label [[OUTERLATCH]], !llvm.loop [[LOOP9]] ; CHECK: outerLatch: ; CHECK-NEXT: br label [[INNERH_14:%.*]] ; CHECK: innerH.14: @@ -498,7 +604,7 @@ define void @test5() { ; CHECK-NEXT: [[C2_3_1:%.*]] = call i1 @unknown(i32 0) ; CHECK-NEXT: br i1 [[C2_3_1]], label [[INNERLATCH_3_1:%.*]], label [[EXITB_LOOPEXIT_LOOPEXIT_LOOPEXIT19]] ; CHECK: innerLatch.3.1: -; CHECK-NEXT: br i1 false, label [[INNERH_14]], label [[OUTERLATCH_1]], !llvm.loop [[LOOP6]] +; CHECK-NEXT: br i1 false, label [[INNERH_14]], label [[OUTERLATCH_1]], !llvm.loop [[LOOP9]] ; CHECK: outerLatch.1: ; CHECK-NEXT: br label [[INNERH_29:%.*]] ; CHECK: innerH.29: @@ -532,7 +638,7 @@ define void @test5() { ; CHECK-NEXT: [[C2_3_2:%.*]] = call i1 @unknown(i32 0) ; CHECK-NEXT: br i1 [[C2_3_2]], label [[INNERLATCH_3_2:%.*]], label [[EXITB_LOOPEXIT_LOOPEXIT_LOOPEXIT21]] ; CHECK: innerLatch.3.2: -; CHECK-NEXT: br i1 false, label [[INNERH_29]], label [[OUTERLATCH_2]], !llvm.loop [[LOOP6]] +; CHECK-NEXT: br i1 false, label [[INNERH_29]], label [[OUTERLATCH_2]], !llvm.loop [[LOOP9]] ; CHECK: outerLatch.2: ; CHECK-NEXT: br label [[INNERH_314:%.*]] ; CHECK: innerH.314: @@ -566,11 +672,11 @@ define void @test5() { ; CHECK-NEXT: [[C2_3_3:%.*]] = call i1 @unknown(i32 0) ; CHECK-NEXT: br i1 [[C2_3_3]], label [[INNERLATCH_3_3:%.*]], label [[EXITB_LOOPEXIT_LOOPEXIT_LOOPEXIT23]] ; CHECK: innerLatch.3.3: -; CHECK-NEXT: br i1 false, label [[INNERH_314]], label [[OUTERLATCH_3]], !llvm.loop [[LOOP6]] +; CHECK-NEXT: br i1 false, label [[INNERH_314]], label [[OUTERLATCH_3]], !llvm.loop [[LOOP9]] ; CHECK: outerLatch.3: ; CHECK-NEXT: [[TMP6_3]] = add i32 [[TMP4]], 4 ; CHECK-NEXT: [[TMP7_3:%.*]] = icmp sgt i32 [[TMP6_3]], 79 -; CHECK-NEXT: br i1 [[TMP7_3]], label [[OUTERLATCHEXIT_LOOPEXIT_UNR_LCSSA:%.*]], label [[OUTERH]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP7_3]], label [[OUTERLATCHEXIT_LOOPEXIT_UNR_LCSSA:%.*]], label [[OUTERH]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK: outerLatchExit.loopexit.unr-lcssa: ; CHECK-NEXT: br label [[OUTERLATCHEXIT_LOOPEXIT]] ; CHECK: outerLatchExit.loopexit: @@ -676,7 +782,7 @@ define void @test6(i64 %start) { ; CHECK-NEXT: [[TMP7:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT_PROL]], 616 ; CHECK-NEXT: [[PROL_ITER_NEXT]] = add i64 [[PROL_ITER]], 1 ; CHECK-NEXT: [[PROL_ITER_CMP:%.*]] = icmp ne i64 [[PROL_ITER_NEXT]], [[XTRAITER]] -; CHECK-NEXT: br i1 [[PROL_ITER_CMP]], label [[HEADER_PROL]], label [[HEADER_PROL_LOOPEXIT_UNR_LCSSA:%.*]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-NEXT: br i1 [[PROL_ITER_CMP]], label [[HEADER_PROL]], label [[HEADER_PROL_LOOPEXIT_UNR_LCSSA:%.*]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: header.prol.loopexit.unr-lcssa: ; CHECK-NEXT: [[INDVARS_IV_UNR_PH:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_PROL]], [[LATCH_PROL]] ] ; CHECK-NEXT: br label [[HEADER_PROL_LOOPEXIT]] @@ -709,7 +815,7 @@ define void @test6(i64 %start) { ; CHECK: latch.3: ; CHECK-NEXT: [[INDVARS_IV_NEXT_3]] = add nsw i64 [[INDVARS_IV]], 8 ; CHECK-NEXT: [[TMP9:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT_3]], 616 -; CHECK-NEXT: br i1 [[TMP9]], label [[HEADER]], label [[LATCHEXIT_UNR_LCSSA:%.*]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP9]], label [[HEADER]], label [[LATCHEXIT_UNR_LCSSA:%.*]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK: latchexit.unr-lcssa: ; CHECK-NEXT: br label [[LATCHEXIT]] ; CHECK: latchexit: diff --git a/llvm/test/Transforms/LoopUnroll/runtime-unroll-after-rotate.ll b/llvm/test/Transforms/LoopUnroll/runtime-unroll-after-rotate.ll new file mode 100644 index 0000000000000..50afa08a2d7c9 --- /dev/null +++ b/llvm/test/Transforms/LoopUnroll/runtime-unroll-after-rotate.ll @@ -0,0 +1,106 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt --passes=loop-unroll -unroll-runtime=true -unroll-runtime-other-exit-predictable=1 -S %s | FileCheck %s +target triple = "x86_64-unknown-linux-gnu" + +define void @test(i64 %0, ptr %1) #0 { +; CHECK-LABEL: define void @test( +; CHECK-SAME: i64 [[TMP0:%.*]], ptr [[TMP1:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[B1:%.*]] = icmp eq i64 [[TMP0]], 0 +; CHECK-NEXT: br i1 [[B1]], label %[[AFTER:.*]], label %[[BODY_LR_PH:.*]] +; CHECK: [[BODY_LR_PH]]: +; CHECK-NEXT: [[TMP5:%.*]] = sub i64 0, [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = freeze i64 [[TMP5]] +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[TMP2]], -1 +; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP2]], 7 +; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0 +; CHECK-NEXT: br i1 [[LCMP_MOD]], label %[[BODY_PROL_PREHEADER:.*]], label %[[BODY_PROL_LOOPEXIT:.*]] +; CHECK: [[BODY_PROL_PREHEADER]]: +; CHECK-NEXT: br label %[[BODY_PROL:.*]] +; CHECK: [[BODY_PROL]]: +; CHECK-NEXT: [[A2_PROL:%.*]] = phi i64 [ [[TMP0]], %[[BODY_PROL_PREHEADER]] ], [ [[A_PROL:%.*]], %[[HEADER_PROL:.*]] ] +; CHECK-NEXT: [[PROL_ITER:%.*]] = phi i64 [ 0, %[[BODY_PROL_PREHEADER]] ], [ [[PROL_ITER_NEXT:%.*]], %[[HEADER_PROL]] ] +; CHECK-NEXT: [[C_PROL:%.*]] = add i64 [[A2_PROL]], 1 +; CHECK-NEXT: [[D_PROL:%.*]] = load i32, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[E_PROL:%.*]] = icmp eq i32 [[D_PROL]], 0 +; CHECK-NEXT: br i1 [[E_PROL]], label %[[END_LOOPEXIT3:.*]], label %[[HEADER_PROL]] +; CHECK: [[HEADER_PROL]]: +; CHECK-NEXT: [[A_PROL]] = phi i64 [ [[C_PROL]], %[[BODY_PROL]] ] +; CHECK-NEXT: [[B_PROL:%.*]] = icmp eq i64 [[A_PROL]], 0 +; CHECK-NEXT: [[PROL_ITER_NEXT]] = add i64 [[PROL_ITER]], 1 +; CHECK-NEXT: [[PROL_ITER_CMP:%.*]] = icmp ne i64 [[PROL_ITER_NEXT]], [[XTRAITER]] +; CHECK-NEXT: br i1 [[PROL_ITER_CMP]], label %[[BODY_PROL]], label %[[BODY_PROL_LOOPEXIT_UNR_LCSSA:.*]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: [[BODY_PROL_LOOPEXIT_UNR_LCSSA]]: +; CHECK-NEXT: [[A2_UNR_PH:%.*]] = phi i64 [ [[A_PROL]], %[[HEADER_PROL]] ] +; CHECK-NEXT: br label %[[BODY_PROL_LOOPEXIT]] +; CHECK: [[BODY_PROL_LOOPEXIT]]: +; CHECK-NEXT: [[A2_UNR:%.*]] = phi i64 [ [[TMP0]], %[[BODY_LR_PH]] ], [ [[A2_UNR_PH]], %[[BODY_PROL_LOOPEXIT_UNR_LCSSA]] ] +; CHECK-NEXT: [[TMP6:%.*]] = icmp ult i64 [[TMP3]], 7 +; CHECK-NEXT: br i1 [[TMP6]], label %[[HEADER_AFTER_CRIT_EDGE:.*]], label %[[BODY_LR_PH_NEW:.*]] +; CHECK: [[BODY_LR_PH_NEW]]: +; CHECK-NEXT: br label %[[BODY:.*]] +; CHECK: [[HEADER:.*]]: +; CHECK-NEXT: br i1 false, label %[[END_LOOPEXIT:.*]], label %[[HEADER_1:.*]] +; CHECK: [[HEADER_1]]: +; CHECK-NEXT: br i1 false, label %[[END_LOOPEXIT]], label %[[HEADER_2:.*]] +; CHECK: [[HEADER_2]]: +; CHECK-NEXT: br i1 false, label %[[END_LOOPEXIT]], label %[[HEADER_3:.*]] +; CHECK: [[HEADER_3]]: +; CHECK-NEXT: br i1 false, label %[[END_LOOPEXIT]], label %[[HEADER_4:.*]] +; CHECK: [[HEADER_4]]: +; CHECK-NEXT: br i1 false, label %[[END_LOOPEXIT]], label %[[HEADER_5:.*]] +; CHECK: [[HEADER_5]]: +; CHECK-NEXT: br i1 false, label %[[END_LOOPEXIT]], label %[[HEADER_6:.*]] +; CHECK: [[HEADER_6]]: +; CHECK-NEXT: [[C_7:%.*]] = add i64 [[A2:%.*]], 8 +; CHECK-NEXT: br i1 false, label %[[END_LOOPEXIT]], label %[[HEADER_7:.*]] +; CHECK: [[HEADER_7]]: +; CHECK-NEXT: [[B_7:%.*]] = icmp eq i64 [[C_7]], 0 +; CHECK-NEXT: br i1 [[B_7]], label %[[HEADER_AFTER_CRIT_EDGE_UNR_LCSSA:.*]], label %[[BODY]] +; CHECK: [[BODY]]: +; CHECK-NEXT: [[A2]] = phi i64 [ [[A2_UNR]], %[[BODY_LR_PH_NEW]] ], [ [[C_7]], %[[HEADER_7]] ] +; CHECK-NEXT: [[D:%.*]] = load i32, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[E:%.*]] = icmp eq i32 [[D]], 0 +; CHECK-NEXT: br i1 [[E]], label %[[END_LOOPEXIT]], label %[[HEADER]] +; CHECK: [[END_LOOPEXIT]]: +; CHECK-NEXT: br label %[[END:.*]] +; CHECK: [[END_LOOPEXIT3]]: +; CHECK-NEXT: br label %[[END]] +; CHECK: [[END]]: +; CHECK-NEXT: ret void +; CHECK: [[HEADER_AFTER_CRIT_EDGE_UNR_LCSSA]]: +; CHECK-NEXT: br label %[[HEADER_AFTER_CRIT_EDGE]] +; CHECK: [[HEADER_AFTER_CRIT_EDGE]]: +; CHECK-NEXT: br label %[[AFTER]] +; CHECK: [[AFTER]]: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: ret void +; +entry: + br label %header + +header: + %a = phi i64 [ %0, %entry ], [ %c, %body ] + %b = icmp eq i64 %a, 0 + br i1 %b, label %after, label %body + +body: + %c = add i64 %a, 1 + %d = load i32, ptr %1, align 4 + %e = icmp eq i32 %d, 0 + br i1 %e, label %end, label %header + +end: + ret void + +after: + call void @foo() + ret void +} + +declare void @foo() + +;. +; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]]} +; CHECK: [[META1]] = !{!"llvm.loop.unroll.disable"} +;. \ No newline at end of file