diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 8ea850af7a69b..5c5c6ec96cfc7 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -7933,6 +7933,67 @@ The attributes in this metadata is added to all followup loops of the loop distribution pass. See :ref:`Transformation Metadata ` for details. +'``llvm.loop.estimated_trip_count``' Metadata +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +This metadata records an estimated trip count for the loop. The first operand +is the string ``llvm.loop.estimated_trip_count``. The second operand is an +integer specifying the count, which might be omitted for the reasons described +below. For example: + +.. code-block:: llvm + + !0 = !{!"llvm.loop.estimated_trip_count", i32 8} + !1 = !{!"llvm.loop.estimated_trip_count"} + +Purpose +""""""" + +A loop's estimated trip count is an estimate of the average number of loop +iterations (specifically, the number of times the loop's header executes) each +time execution reaches the loop. It is usually only an estimate based on, for +example, profile data. The actual number of iterations might vary widely. + +The estimated trip count serves as a parameter for various loop transformations +and typically helps estimate transformation cost. For example, it can help +determine how many iterations to peel or how aggressively to unroll. + +Initialization and Maintenance +"""""""""""""""""""""""""""""" + +The ``pgo-estimate-trip-counts`` pass typically runs immediately after profile +ingestion to add this metadata to all loops. It estimates each loop's trip +count from the loop's ``branch_weights`` metadata. This way of initially +estimating trip counts appears to be useful for the passes that consume them. + +As passes transform existing loops and create new loops, they must be free to +update and create ``branch_weights`` metadata to maintain accurate block +frequencies. Trip counts estimated from this new ``branch_weights`` metadata +are not necessarily useful to the passes that consume them. In general, when +passes transform and create loops, they should separately estimate new trip +counts from previously estimated trip counts, and they should record them by +creating or updating this metadata. For this or any other work involving +estimated trip counts, passes should always call +``llvm::getLoopEstimatedTripCount`` and ``llvm::setLoopEstimatedTripCount``. + +Missing Metadata and Values +""""""""""""""""""""""""""" + +If the current implementation of ``pgo-estimate-trip-counts`` cannot estimate a +trip count from the loop's ``branch_weights`` metadata due to the loop's form or +due to missing profile data, it creates this metadata for the loop but omits the +value. This situation is currently common (e.g., the LLVM IR loop that Clang +emits for a simple C ``for`` loop). A later pass (e.g., ``loop-rotate``) might +modify the loop's form in a way that enables estimating its trip count even if +those modifications provably never impact the actual number of loop iterations. +That later pass should then add an appropriate value to the metadata. + +However, not all such passes currently do so. Thus, if this metadata has no +value, ``llvm::getLoopEstimatedTripCount`` will disregard it and estimate the +trip count from the loop's ``branch_weights`` metadata. It does the same when +the metadata is missing altogether, perhaps because ``pgo-estimate-trip-counts`` +was not specified in a minimal pass list to a tool like ``opt``. + '``llvm.licm.disable``' Metadata ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/llvm/include/llvm/Analysis/LoopInfo.h b/llvm/include/llvm/Analysis/LoopInfo.h index a7a6a2753709c..a06be573b5e01 100644 --- a/llvm/include/llvm/Analysis/LoopInfo.h +++ b/llvm/include/llvm/Analysis/LoopInfo.h @@ -637,9 +637,13 @@ LLVM_ABI std::optional getOptionalBoolLoopAttribute(const Loop *TheLoop, /// Returns true if Name is applied to TheLoop and enabled. LLVM_ABI bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name); -/// Find named metadata for a loop with an integer value. -LLVM_ABI std::optional getOptionalIntLoopAttribute(const Loop *TheLoop, - StringRef Name); +/// Find named metadata for a loop with an integer value. Return +/// \c std::nullopt if the metadata has no value or is missing altogether. If +/// \p Missing, set \c *Missing to indicate whether the metadata is missing +/// altogether. +LLVM_ABI std::optional +getOptionalIntLoopAttribute(const Loop *TheLoop, StringRef Name, + bool *Missing = nullptr); /// Find named metadata for a loop with an integer value. Return \p Default if /// not set. diff --git a/llvm/include/llvm/Transforms/Instrumentation/PGOEstimateTripCounts.h b/llvm/include/llvm/Transforms/Instrumentation/PGOEstimateTripCounts.h new file mode 100644 index 0000000000000..1b35c1c77e5c3 --- /dev/null +++ b/llvm/include/llvm/Transforms/Instrumentation/PGOEstimateTripCounts.h @@ -0,0 +1,24 @@ +//===- PGOEstimateTripCounts.h ----------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_INSTRUMENTATION_PGOESTIMATETRIPCOUNTS_H +#define LLVM_TRANSFORMS_INSTRUMENTATION_PGOESTIMATETRIPCOUNTS_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { + +struct PGOEstimateTripCountsPass + : public PassInfoMixin { + PGOEstimateTripCountsPass() {} + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); +}; + +} // namespace llvm + +#endif // LLVM_TRANSFORMS_INSTRUMENTATION_PGOESTIMATETRIPCOUNTS_H diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h index e4d2f9d191707..7d03fb0d81e4c 100644 --- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h +++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h @@ -316,28 +316,73 @@ LLVM_ABI TransformationMode hasDistributeTransformation(const Loop *L); LLVM_ABI TransformationMode hasLICMVersioningTransformation(const Loop *L); /// @} -/// Set input string into loop metadata by keeping other values intact. -/// If the string is already in loop metadata update value if it is -/// different. -LLVM_ABI void addStringMetadataToLoop(Loop *TheLoop, const char *MDString, - unsigned V = 0); - -/// Returns a loop's estimated trip count based on branch weight metadata. -/// In addition if \p EstimatedLoopInvocationWeight is not null it is -/// initialized with weight of loop's latch leading to the exit. -/// Returns a valid positive trip count, saturated at UINT_MAX, or std::nullopt -/// when a meaningful estimate cannot be made. +/// Set the string \p MDString into the loop metadata of \p TheLoop while +/// keeping other loop metadata intact. Set \p *V as its value, or set it +/// without a value if \p V is \c std::nullopt to indicate the value is unknown. +/// If \p MDString is already in the loop metadata, update it if its value (or +/// lack of value) is different. Return true if metadata was changed. +LLVM_ABI bool addStringMetadataToLoop(Loop *TheLoop, const char *MDString, + std::optional V = 0); + +/// Return either: +/// - The value of \c llvm.loop.estimated_trip_count from the loop metadata of +/// \p L, if that metadata is present and has a value. +/// - Else, a new estimate of the trip count from the latch branch weights of +/// \p L, if the estimation's implementation is able to handle the loop form +/// of \p L (e.g., \p L must have a latch block that controls the loop exit). +/// - Else, \c std::nullopt. +/// +/// An estimated trip count is always a valid positive trip count, saturated at +/// \c UINT_MAX. +/// +/// Via \c LLVM_DEBUG, emit diagnostics that include "WARNING" when the metadata +/// is in an unexpected state as that indicates some transformation has +/// corrupted it. If \p DbgForInit, expect the metadata to be missing. +/// Otherwise, expect the metadata to be present, and expect it to have no value +/// only if the trip count is currently inestimable from the latch branch +/// weights. +/// +/// In addition, if \p EstimatedLoopInvocationWeight, then either: +/// - Set \p *EstimatedLoopInvocationWeight to the weight of the latch's branch +/// to the loop exit. +/// - Do not set it and return \c std::nullopt if the current implementation +/// cannot compute that weight (e.g., if \p L does not have a latch block that +/// controls the loop exit) or the weight is zero (because zero cannot be +/// used to compute new branch weights that reflect the estimated trip count). +/// +/// TODO: Eventually, once all passes have migrated away from setting branch +/// weights to indicate estimated trip counts, this function will drop the +/// \p EstimatedLoopInvocationWeight parameter. LLVM_ABI std::optional getLoopEstimatedTripCount(Loop *L, - unsigned *EstimatedLoopInvocationWeight = nullptr); - -/// Set a loop's branch weight metadata to reflect that loop has \p -/// EstimatedTripCount iterations and \p EstimatedLoopInvocationWeight exits -/// through latch. Returns true if metadata is successfully updated, false -/// otherwise. Note that loop must have a latch block which controls loop exit -/// in order to succeed. -LLVM_ABI bool setLoopEstimatedTripCount(Loop *L, unsigned EstimatedTripCount, - unsigned EstimatedLoopInvocationWeight); + unsigned *EstimatedLoopInvocationWeight = nullptr, + bool DbgForInit = false); + +/// Set \c llvm.loop.estimated_trip_count with the value \c *EstimatedTripCount +/// in the loop metadata of \p L, or set it without a value if +/// \c !EstimatedTripCount to indicate that \c getLoopEstimatedTripCount cannot +/// estimate the trip count from latch branch weights. If +/// \c !EstimatedTripCount but \c getLoopEstimatedTripCount can estimate the +/// trip counts, future calls to \c getLoopEstimatedTripCount will diagnose the +/// metadata as corrupt. +/// +/// In addition, if \p EstimatedLoopInvocationWeight, set the branch weight +/// metadata of \p L to reflect that \p L has an estimated +/// \c *EstimatedTripCount iterations and has \c *EstimatedLoopInvocationWeight +/// exit weight through the loop's latch. +/// +/// Return false if \c llvm.loop.estimated_trip_count was already set according +/// to \p EstimatedTripCount and so was not updated. Return false if +/// \p EstimatedLoopInvocationWeight and if branch weight metadata could not be +/// successfully updated (e.g., if \p L does not have a latch block that +/// controls the loop exit). Otherwise, return true. +/// +/// TODO: Eventually, once all passes have migrated away from setting branch +/// weights to indicate estimated trip counts, this function will drop the +/// \p EstimatedLoopInvocationWeight parameter. +LLVM_ABI bool setLoopEstimatedTripCount( + Loop *L, std::optional EstimatedTripCount, + std::optional EstimatedLoopInvocationWeight = std::nullopt); /// Check inner loop (L) backedge count is known to be invariant on all /// iterations of its outer loop. If the loop has no parent, this is trivially diff --git a/llvm/lib/Analysis/LoopInfo.cpp b/llvm/lib/Analysis/LoopInfo.cpp index 901cfe03ecd33..ba2c30b3c4764 100644 --- a/llvm/lib/Analysis/LoopInfo.cpp +++ b/llvm/lib/Analysis/LoopInfo.cpp @@ -1112,9 +1112,13 @@ bool llvm::getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name) { } std::optional llvm::getOptionalIntLoopAttribute(const Loop *TheLoop, - StringRef Name) { - const MDOperand *AttrMD = - findStringMetadataForLoop(TheLoop, Name).value_or(nullptr); + StringRef Name, + bool *Missing) { + std::optional AttrMDOpt = + findStringMetadataForLoop(TheLoop, Name); + if (Missing) + *Missing = !AttrMDOpt; + const MDOperand *AttrMD = AttrMDOpt.value_or(nullptr); if (!AttrMD) return std::nullopt; diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 572e5f19a1972..f593c5bba7573 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -248,6 +248,7 @@ #include "llvm/Transforms/Instrumentation/NumericalStabilitySanitizer.h" #include "llvm/Transforms/Instrumentation/PGOCtxProfFlattening.h" #include "llvm/Transforms/Instrumentation/PGOCtxProfLowering.h" +#include "llvm/Transforms/Instrumentation/PGOEstimateTripCounts.h" #include "llvm/Transforms/Instrumentation/PGOForceFunctionAttrs.h" #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h" #include "llvm/Transforms/Instrumentation/RealtimeSanitizer.h" diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp index 98821bb1408a7..fc0d88e710426 100644 --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -80,6 +80,7 @@ #include "llvm/Transforms/Instrumentation/MemProfUse.h" #include "llvm/Transforms/Instrumentation/PGOCtxProfFlattening.h" #include "llvm/Transforms/Instrumentation/PGOCtxProfLowering.h" +#include "llvm/Transforms/Instrumentation/PGOEstimateTripCounts.h" #include "llvm/Transforms/Instrumentation/PGOForceFunctionAttrs.h" #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h" #include "llvm/Transforms/Scalar/ADCE.h" @@ -1268,8 +1269,13 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, MPM.addPass(MemProfUsePass(PGOOpt->MemoryProfile, PGOOpt->FS)); if (PGOOpt && (PGOOpt->Action == PGOOptions::IRUse || - PGOOpt->Action == PGOOptions::SampleUse)) + PGOOpt->Action == PGOOptions::SampleUse)) { MPM.addPass(PGOForceFunctionAttrsPass(PGOOpt->ColdOptType)); + // TODO: Is this the right place for this pass? Should we enable it in any + // other case, such as when __builtin_expect_with_probability or + // __builtin_expect appears in the source code but profiles are not read? + MPM.addPass(PGOEstimateTripCountsPass()); + } MPM.addPass(AlwaysInlinerPass(/*InsertLifetimeIntrinsics=*/true)); @@ -2355,4 +2361,4 @@ AAManager PassBuilder::buildDefaultAAPipeline() { bool PassBuilder::isInstrumentedPGOUse() const { return (PGOOpt && PGOOpt->Action == PGOOptions::IRUse) || !UseCtxProfile.empty(); -} \ No newline at end of file +} diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index 96250772da4a0..6e7ac959f57f4 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -124,6 +124,7 @@ MODULE_PASS("openmp-opt", OpenMPOptPass()) MODULE_PASS("openmp-opt-postlink", OpenMPOptPass(ThinOrFullLTOPhase::FullLTOPostLink)) MODULE_PASS("partial-inliner", PartialInlinerPass()) +MODULE_PASS("pgo-estimate-trip-counts", PGOEstimateTripCountsPass()) MODULE_PASS("pgo-icall-prom", PGOIndirectCallPromotion()) MODULE_PASS("pgo-instr-gen", PGOInstrumentationGen()) MODULE_PASS("pgo-instr-use", PGOInstrumentationUse()) diff --git a/llvm/lib/Transforms/Instrumentation/CMakeLists.txt b/llvm/lib/Transforms/Instrumentation/CMakeLists.txt index 15fd421a41b0f..0a97ed4b51e69 100644 --- a/llvm/lib/Transforms/Instrumentation/CMakeLists.txt +++ b/llvm/lib/Transforms/Instrumentation/CMakeLists.txt @@ -16,6 +16,7 @@ add_llvm_component_library(LLVMInstrumentation LowerAllowCheckPass.cpp PGOCtxProfFlattening.cpp PGOCtxProfLowering.cpp + PGOEstimateTripCounts.cpp PGOForceFunctionAttrs.cpp PGOInstrumentation.cpp PGOMemOPSizeOpt.cpp diff --git a/llvm/lib/Transforms/Instrumentation/PGOEstimateTripCounts.cpp b/llvm/lib/Transforms/Instrumentation/PGOEstimateTripCounts.cpp new file mode 100644 index 0000000000000..762aca0b897ce --- /dev/null +++ b/llvm/lib/Transforms/Instrumentation/PGOEstimateTripCounts.cpp @@ -0,0 +1,45 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Instrumentation/PGOEstimateTripCounts.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/IR/Module.h" +#include "llvm/Transforms/Utils/LoopUtils.h" + +using namespace llvm; + +#define DEBUG_TYPE "pgo-estimate-trip-counts" + +static bool runOnLoop(Loop *L) { + bool MadeChange = false; + std::optional TC = getLoopEstimatedTripCount( + L, /*EstimatedLoopInvocationWeight=*/nullptr, /*DbgForInit=*/true); + MadeChange |= setLoopEstimatedTripCount(L, TC); + for (Loop *SL : *L) + MadeChange |= runOnLoop(SL); + return MadeChange; +} + +PreservedAnalyses PGOEstimateTripCountsPass::run(Module &M, + ModuleAnalysisManager &AM) { + FunctionAnalysisManager &FAM = + AM.getResult(M).getManager(); + bool MadeChange = false; + LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": start\n"); + for (Function &F : M) { + if (F.isDeclaration()) + continue; + LoopInfo *LI = &FAM.getResult(F); + if (!LI) + continue; + for (Loop *L : *LI) + MadeChange |= runOnLoop(L); + } + LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": end\n"); + return MadeChange ? PreservedAnalyses::none() : PreservedAnalyses::all(); +} diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp index 200d1fb854155..50530590cf368 100644 --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -54,6 +54,8 @@ using namespace llvm::PatternMatch; static const char *LLVMLoopDisableNonforced = "llvm.loop.disable_nonforced"; static const char *LLVMLoopDisableLICM = "llvm.licm.disable"; +static const char *LLVMLoopEstimatedTripCount = + "llvm.loop.estimated_trip_count"; bool llvm::formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, @@ -201,34 +203,40 @@ void llvm::initializeLoopPassPass(PassRegistry &Registry) { } /// Create MDNode for input string. -static MDNode *createStringMetadata(Loop *TheLoop, StringRef Name, unsigned V) { +static MDNode *createStringMetadata(Loop *TheLoop, StringRef Name, + std::optional V) { LLVMContext &Context = TheLoop->getHeader()->getContext(); - Metadata *MDs[] = { - MDString::get(Context, Name), - ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(Context), V))}; - return MDNode::get(Context, MDs); + if (V) { + Metadata *MDs[] = {MDString::get(Context, Name), + ConstantAsMetadata::get( + ConstantInt::get(Type::getInt32Ty(Context), *V))}; + return MDNode::get(Context, MDs); + } + return MDNode::get(Context, {MDString::get(Context, Name)}); } -/// Set input string into loop metadata by keeping other values intact. -/// If the string is already in loop metadata update value if it is -/// different. -void llvm::addStringMetadataToLoop(Loop *TheLoop, const char *StringMD, - unsigned V) { +bool llvm::addStringMetadataToLoop(Loop *TheLoop, const char *StringMD, + std::optional V) { SmallVector MDs(1); // If the loop already has metadata, retain it. MDNode *LoopID = TheLoop->getLoopID(); if (LoopID) { for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) { MDNode *Node = cast(LoopID->getOperand(i)); - // If it is of form key = value, try to parse it. - if (Node->getNumOperands() == 2) { + // If it is of form key [= value], try to parse it. + unsigned NumOps = Node->getNumOperands(); + if (NumOps == 1 || NumOps == 2) { MDString *S = dyn_cast(Node->getOperand(0)); if (S && S->getString() == StringMD) { - ConstantInt *IntMD = - mdconst::extract_or_null(Node->getOperand(1)); - if (IntMD && IntMD->getSExtValue() == V) - // It is already in place. Do nothing. - return; + // If it is already in place, do nothing. + if (NumOps == 2 && V) { + ConstantInt *IntMD = + mdconst::extract_or_null(Node->getOperand(1)); + if (IntMD && IntMD->getSExtValue() == *V) + return false; + } else if (NumOps == 1 && !V) { + return false; + } // We need to update the value, so just skip it here and it will // be added after copying other existed nodes. continue; @@ -245,6 +253,7 @@ void llvm::addStringMetadataToLoop(Loop *TheLoop, const char *StringMD, // Set operand 0 to refer to the loop id itself. NewLoopID->replaceOperandWith(0, NewLoopID); TheLoop->setLoopID(NewLoopID); + return true; } std::optional @@ -804,26 +813,48 @@ static BranchInst *getExpectedExitLoopLatchBranch(Loop *L) { return LatchBR; } -/// Return the estimated trip count for any exiting branch which dominates -/// the loop latch. -static std::optional getEstimatedTripCount(BranchInst *ExitingBranch, - Loop *L, - uint64_t &OrigExitWeight) { +struct DbgLoop { + const Loop *L; + explicit DbgLoop(const Loop *L) : L(L) {} +}; +static inline raw_ostream &operator<<(raw_ostream &OS, DbgLoop D) { + OS << "function "; + D.L->getHeader()->getParent()->printAsOperand(OS, /*PrintType=*/false); + return OS << " " << *D.L; +} + +static std::optional estimateLoopTripCount(Loop *L) { + // Currently we take the estimate exit count only from the loop latch, + // ignoring other exiting blocks. This can overestimate the trip count + // if we exit through another exit, but can never underestimate it. + // TODO: incorporate information from other exits + BranchInst *ExitingBranch = getExpectedExitLoopLatchBranch(L); + if (!ExitingBranch) { + LLVM_DEBUG(dbgs() << "estimateLoopTripCount: Failed to find exiting " + << "latch branch of required form in " << DbgLoop(L) + << "\n"); + return std::nullopt; + } + // To estimate the number of times the loop body was executed, we want to // know the number of times the backedge was taken, vs. the number of times // we exited the loop. uint64_t LoopWeight, ExitWeight; - if (!extractBranchWeights(*ExitingBranch, LoopWeight, ExitWeight)) + if (!extractBranchWeights(*ExitingBranch, LoopWeight, ExitWeight)) { + LLVM_DEBUG(dbgs() << "estimateLoopTripCount: Failed to extract branch " + << "weights for " << DbgLoop(L) << "\n"); return std::nullopt; + } if (L->contains(ExitingBranch->getSuccessor(1))) std::swap(LoopWeight, ExitWeight); - if (!ExitWeight) + if (!ExitWeight) { // Don't have a way to return predicated infinite + LLVM_DEBUG(dbgs() << "estimateLoopTripCount: Failed because of zero exit " + << "probability for " << DbgLoop(L) << "\n"); return std::nullopt; - - OrigExitWeight = ExitWeight; + } // Estimated exit count is a ratio of the loop weight by the weight of the // edge exiting the loop, rounded to nearest. @@ -834,33 +865,86 @@ static std::optional getEstimatedTripCount(BranchInst *ExitingBranch, return std::numeric_limits::max(); // Estimated trip count is one plus estimated exit count. - return ExitCount + 1; + uint64_t TC = ExitCount + 1; + LLVM_DEBUG(dbgs() << "estimateLoopTripCount: estimated trip count of " << TC + << " for " << DbgLoop(L) << "\n"); + return TC; } -std::optional -llvm::getLoopEstimatedTripCount(Loop *L, - unsigned *EstimatedLoopInvocationWeight) { - // Currently we take the estimate exit count only from the loop latch, - // ignoring other exiting blocks. This can overestimate the trip count - // if we exit through another exit, but can never underestimate it. - // TODO: incorporate information from other exits - if (BranchInst *LatchBranch = getExpectedExitLoopLatchBranch(L)) { - uint64_t ExitWeight; - if (std::optional EstTripCount = - getEstimatedTripCount(LatchBranch, L, ExitWeight)) { - if (EstimatedLoopInvocationWeight) - *EstimatedLoopInvocationWeight = ExitWeight; - return *EstTripCount; +std::optional llvm::getLoopEstimatedTripCount( + Loop *L, unsigned *EstimatedLoopInvocationWeight, bool DbgForInit) { + // If requested, either compute *EstimatedLoopInvocationWeight or return + // nullopt if cannot. + // + // TODO: Eventually, once all passes have migrated away from setting branch + // weights to indicate estimated trip counts, this function will drop the + // EstimatedLoopInvocationWeight parameter. + if (EstimatedLoopInvocationWeight) { + if (BranchInst *ExitingBranch = getExpectedExitLoopLatchBranch(L)) { + uint64_t LoopWeight, ExitWeight; + if (!extractBranchWeights(*ExitingBranch, LoopWeight, ExitWeight)) + return std::nullopt; + if (L->contains(ExitingBranch->getSuccessor(1))) + std::swap(LoopWeight, ExitWeight); + if (!ExitWeight) + return std::nullopt; + *EstimatedLoopInvocationWeight = ExitWeight; } } - return std::nullopt; + + // Return the estimated trip count from metadata unless the metadata is + // missing or has no value. + bool Missing; + if (auto TC = getOptionalIntLoopAttribute(L, LLVMLoopEstimatedTripCount, + &Missing)) { + LLVM_DEBUG(dbgs() << "getLoopEstimatedTripCount: " + << LLVMLoopEstimatedTripCount << " metadata has trip " + << "count of " << *TC << " for " << DbgLoop(L) << "\n"); + return TC; + } + + // Estimate the trip count from latch branch weights. + std::optional TC = estimateLoopTripCount(L); + if (DbgForInit) { + // We expect no existing metadata as we are responsible for creating it. + LLVM_DEBUG(dbgs() << (Missing ? "" : "WARNING: ") + << "getLoopEstimatedTripCount: " + << LLVMLoopEstimatedTripCount << " metadata " + << (Missing ? "" : "not ") << "missing as expected " + << "during its init for " << DbgLoop(L) << "\n"); + } else if (Missing) { + // We expect that metadata was already created. + LLVM_DEBUG(dbgs() << "WARNING: getLoopEstimatedTripCount: " + << LLVMLoopEstimatedTripCount << " metadata missing for " + << DbgLoop(L) << "\n"); + } else { + // If the trip count is estimable, the value should have been added already. + LLVM_DEBUG(dbgs() << (TC ? "WARNING: " : "") + << "getLoopEstimatedTripCount: " + << LLVMLoopEstimatedTripCount << " metadata " + << (TC ? "incorrectly " : "correctly ") + << "indicates trip count is inestimable for " + << DbgLoop(L) << "\n"); + } + return TC; } -bool llvm::setLoopEstimatedTripCount(Loop *L, unsigned EstimatedTripCount, - unsigned EstimatedloopInvocationWeight) { - // At the moment, we currently support changing the estimate trip count of - // the latch branch only. We could extend this API to manipulate estimated - // trip counts for any exit. +bool llvm::setLoopEstimatedTripCount( + Loop *L, std::optional EstimatedTripCount, + std::optional EstimatedloopInvocationWeight) { + // Set the metadata. + bool Updated = addStringMetadataToLoop(L, LLVMLoopEstimatedTripCount, + EstimatedTripCount); + if (!EstimatedTripCount || !EstimatedloopInvocationWeight) + return Updated; + + // At the moment, we currently support changing the estimated trip count in + // the latch branch's branch weights only. We could extend this API to + // manipulate estimated trip counts for any exit. + // + // TODO: Eventually, once all passes have migrated away from setting branch + // weights to indicate estimated trip counts, we will not set branch weights + // here at all. BranchInst *LatchBranch = getExpectedExitLoopLatchBranch(L); if (!LatchBranch) return false; @@ -869,9 +953,9 @@ bool llvm::setLoopEstimatedTripCount(Loop *L, unsigned EstimatedTripCount, unsigned LatchExitWeight = 0; unsigned BackedgeTakenWeight = 0; - if (EstimatedTripCount > 0) { - LatchExitWeight = EstimatedloopInvocationWeight; - BackedgeTakenWeight = (EstimatedTripCount - 1) * LatchExitWeight; + if (*EstimatedTripCount > 0) { + LatchExitWeight = *EstimatedloopInvocationWeight; + BackedgeTakenWeight = (*EstimatedTripCount - 1) * LatchExitWeight; } // Make a swap if back edge is taken when condition is "false". @@ -885,7 +969,7 @@ bool llvm::setLoopEstimatedTripCount(Loop *L, unsigned EstimatedTripCount, LLVMContext::MD_prof, MDB.createBranchWeights(BackedgeTakenWeight, LatchExitWeight)); - return true; + return Updated; } bool llvm::hasIterationCountInvariantInParent(Loop *InnerLoop, diff --git a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll index 38b7890682783..516f1bf972429 100644 --- a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll @@ -59,38 +59,64 @@ ; CHECK-O-NEXT: Running analysis: LoopAnalysis on foo ; CHECK-O-NEXT: Running analysis: PostDominatorTreeAnalysis on foo ; CHECK-O-NEXT: Running pass: SimplifyCFGPass on foo - ; CHECK-O-NEXT: Running pass: PGOForceFunctionAttrsPass +; CHECK-O-NEXT: Running pass: PGOEstimateTripCountsPass +; CHECK-O-NEXT: Invalidating analysis: InnerAnalysisManagerProxy +; CHECK-O-NEXT: Invalidating analysis: LazyCallGraphAnalysis ; CHECK-O-NEXT: Running pass: AlwaysInlinerPass +; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy ; CHECK-O-NEXT: Running pass: ModuleInlinerWrapperPass ; CHECK-O-NEXT: Running analysis: InlineAdvisorAnalysis ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}GlobalsAA ; CHECK-O-NEXT: Running analysis: GlobalsAA ; CHECK-O-NEXT: Running analysis: CallGraphAnalysis ; CHECK-O-NEXT: Running pass: InvalidateAnalysisPass<{{.*}}AAManager -; CHECK-O-NEXT: Invalidating analysis: AAManager ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy +; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis +; CHECK-O-NEXT: Running analysis: TargetLibraryAnalysis on foo ; CHECK-O-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy ; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy ; CHECK-O-NEXT: Running pass: DevirtSCCRepeatedPass ; CHECK-O-NEXT: Running pass: InlinerPass +; CHECK-O-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis on foo ; CHECK-O-NEXT: Running pass: PostOrderFunctionAttrsPass ; CHECK-O3-NEXT: Running pass: ArgumentPromotionPass ; CHECK-O2-NEXT: Running pass: OpenMPOptCGSCCPass ; CHECK-O3-NEXT: Running pass: OpenMPOptCGSCCPass ; CHECK-O-NEXT: Running pass: SROAPass +; CHECK-O-NEXT: Running analysis: DominatorTreeAnalysis on foo +; CHECK-O-NEXT: Running analysis: AssumptionAnalysis on foo +; CHECK-O-NEXT: Running analysis: TargetIRAnalysis on foo ; CHECK-O-NEXT: Running pass: EarlyCSEPass ; CHECK-O-NEXT: Running analysis: MemorySSAAnalysis ; CHECK-O-NEXT: Running analysis: AAManager +; CHECK-O23SZ-NEXT: Running analysis: BasicAA on foo +; CHECK-O23SZ-NEXT: Running analysis: ScopedNoAliasAA on foo +; CHECK-O23SZ-NEXT: Running analysis: TypeBasedAA on foo +; CHECK-O23SZ-NEXT: Running analysis: OuterAnalysisManagerProxy ; CHECK-O23SZ-NEXT: Running pass: SpeculativeExecutionPass ; CHECK-O23SZ-NEXT: Running pass: JumpThreadingPass ; CHECK-O23SZ-NEXT: Running analysis: LazyValueAnalysis ; CHECK-O23SZ-NEXT: Running pass: CorrelatedValuePropagationPass ; CHECK-O23SZ-NEXT: Invalidating analysis: LazyValueAnalysis +; CHECK-O1-NEXT: Running analysis: BasicAA on foo +; CHECK-O1-NEXT: Running analysis: ScopedNoAliasAA on foo +; CHECK-O1-NEXT: Running analysis: TypeBasedAA on foo +; CHECK-O1-NEXT: Running analysis: OuterAnalysisManagerProxy ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running pass: InstCombinePass +; CHECK-O23SZ-NEXT: Running analysis: LastRunTrackingAnalysis on foo +; CHECK-O23SZ-NEXT: Running analysis: BlockFrequencyAnalysis on foo +; CHECK-O23SZ-NEXT: Running analysis: BranchProbabilityAnalysis on foo +; CHECK-O23SZ-NEXT: Running analysis: LoopAnalysis on foo +; CHECK-O23SZ-NEXT: Running analysis: PostDominatorTreeAnalysis on foo ; CHECK-O23SZ-NEXT: Running pass: AggressiveInstCombinePass +; CHECK-O1-NEXT: Running analysis: LastRunTrackingAnalysis on foo +; CHECK-O1-NEXT: Running analysis: BlockFrequencyAnalysis on foo +; CHECK-O1-NEXT: Running analysis: BranchProbabilityAnalysis on foo +; CHECK-O1-NEXT: Running analysis: LoopAnalysis on foo +; CHECK-O1-NEXT: Running analysis: PostDominatorTreeAnalysis on foo ; CHECK-O1-NEXT: Running pass: LibCallsShrinkWrapPass ; CHECK-O2-NEXT: Running pass: LibCallsShrinkWrapPass ; CHECK-O3-NEXT: Running pass: LibCallsShrinkWrapPass @@ -155,6 +181,7 @@ ; CHECK-O-NEXT: Running pass: SimplifyTypeTestsPass ; CHECK-O-NEXT: Running pass: CoroCleanupPass ; CHECK-O-NEXT: Running pass: GlobalOptPass +; CHECK-O-NEXT: Running analysis: TargetLibraryAnalysis on bar ; CHECK-O-NEXT: Running pass: GlobalDCEPass ; CHECK-O-NEXT: Running pass: EliminateAvailableExternallyPass ; CHECK-O-NEXT: Running pass: ReversePostOrderFunctionAttrsPass diff --git a/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll index f6a9406596803..e26e1eafbfb65 100644 --- a/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll @@ -86,7 +86,12 @@ ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy ; CHECK-O-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis on foo ; CHECK-O-NEXT: Running pass: PGOForceFunctionAttrsPass +; CHECK-O-NEXT: Running pass: PGOEstimateTripCountsPass +; CHECK-O-NEXT: Running analysis: LoopAnalysis on foo +; CHECK-O-NEXT: Running analysis: DominatorTreeAnalysis on foo +; CHECK-O-NEXT: Invalidating analysis: InnerAnalysisManagerProxy ; CHECK-O-NEXT: Running pass: AlwaysInlinerPass +; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy ; CHECK-O-NEXT: Running pass: ModuleInlinerWrapperPass ; CHECK-O-NEXT: Running analysis: InlineAdvisorAnalysis ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}GlobalsAA @@ -101,6 +106,7 @@ ; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy<{{.*}}LazyCallGraph::SCC{{.*}}> ; CHECK-O-NEXT: Running pass: DevirtSCCRepeatedPass ; CHECK-O-NEXT: Running pass: InlinerPass +; CHECK-O-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis on foo ; CHECK-O-NEXT: Running pass: PostOrderFunctionAttrsPass ; CHECK-O3-NEXT: Running pass: ArgumentPromotionPass ; CHECK-O2-NEXT: Running pass: OpenMPOptCGSCCPass diff --git a/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll index 48a9433d24999..0f8e1c770a4dc 100644 --- a/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll @@ -66,28 +66,41 @@ ; CHECK-O-NEXT: Running analysis: PostDominatorTreeAnalysis on foo ; CHECK-O-NEXT: Running pass: SimplifyCFGPass on foo ; CHECK-O-NEXT: Running pass: PGOForceFunctionAttrsPass +; CHECK-O-NEXT: Running pass: PGOEstimateTripCountsPass +; CHECK-O-NEXT: Invalidating analysis: InnerAnalysisManagerProxy +; CHECK-O-NEXT: Invalidating analysis: LazyCallGraphAnalysis ; CHECK-O-NEXT: Running pass: AlwaysInlinerPass +; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy ; CHECK-O-NEXT: Running pass: ModuleInlinerWrapperPass ; CHECK-O-NEXT: Running analysis: InlineAdvisorAnalysis ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}GlobalsAA ; CHECK-O-NEXT: Running analysis: GlobalsAA ; CHECK-O-NEXT: Running analysis: CallGraphAnalysis ; CHECK-O-NEXT: Running pass: InvalidateAnalysisPass<{{.*}}AAManager -; CHECK-O-NEXT: Invalidating analysis: AAManager ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy +; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis on [module] +; CHECK-O-NEXT: Running analysis: TargetLibraryAnalysis on foo ; CHECK-O-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy ; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy<{{.*}}LazyCallGraph::SCC{{.*}}> ; CHECK-O-NEXT: Running pass: DevirtSCCRepeatedPass ; CHECK-O-NEXT: Running pass: InlinerPass +; CHECK-O-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis on foo ; CHECK-O-NEXT: Running pass: PostOrderFunctionAttrsPass ; CHECK-O3-NEXT: Running pass: ArgumentPromotionPass ; CHECK-O2-NEXT: Running pass: OpenMPOptCGSCCPass ; CHECK-O3-NEXT: Running pass: OpenMPOptCGSCCPass ; CHECK-O-NEXT: Running pass: SROAPass +; CHECK-O-NEXT: Running analysis: DominatorTreeAnalysis on foo +; CHECK-O-NEXT: Running analysis: AssumptionAnalysis on foo +; CHECK-O-NEXT: Running analysis: TargetIRAnalysis on foo ; CHECK-O-NEXT: Running pass: EarlyCSEPass ; CHECK-O-NEXT: Running analysis: MemorySSAAnalysis ; CHECK-O-NEXT: Running analysis: AAManager +; CHECK-O-NEXT: Running analysis: BasicAA on foo +; CHECK-O-NEXT: Running analysis: ScopedNoAliasAA on foo +; CHECK-O-NEXT: Running analysis: TypeBasedAA on foo +; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy ; CHECK-O23SZ-NEXT: Running pass: SpeculativeExecutionPass ; CHECK-O23SZ-NEXT: Running pass: JumpThreadingPass ; CHECK-O23SZ-NEXT: Running analysis: LazyValueAnalysis @@ -95,7 +108,17 @@ ; CHECK-O23SZ-NEXT: Invalidating analysis: LazyValueAnalysis ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running pass: InstCombinePass +; CHECK-O23SZ-NEXT: Running analysis: LastRunTrackingAnalysis on foo +; CHECK-O23SZ-NEXT: Running analysis: BlockFrequencyAnalysis on foo +; CHECK-O23SZ-NEXT: Running analysis: BranchProbabilityAnalysis on foo +; CHECK-O23SZ-NEXT: Running analysis: LoopAnalysis on foo +; CHECK-O23SZ-NEXT: Running analysis: PostDominatorTreeAnalysis on foo ; CHECK-O23SZ-NEXT: Running pass: AggressiveInstCombinePass +; CHECK-O1-NEXT: Running analysis: LastRunTrackingAnalysis on foo +; CHECK-O1-NEXT: Running analysis: BlockFrequencyAnalysis on foo +; CHECK-O1-NEXT: Running analysis: BranchProbabilityAnalysis on foo +; CHECK-O1-NEXT: Running analysis: LoopAnalysis on foo +; CHECK-O1-NEXT: Running analysis: PostDominatorTreeAnalysis on foo ; CHECK-O1-NEXT: Running pass: LibCallsShrinkWrapPass ; CHECK-O2-NEXT: Running pass: LibCallsShrinkWrapPass ; CHECK-O3-NEXT: Running pass: LibCallsShrinkWrapPass @@ -155,6 +178,7 @@ ; CHECK-O-NEXT: Invalidating analysis: InlineAdvisorAnalysis ; CHECK-O-NEXT: Running pass: DeadArgumentEliminationPass ; CHECK-O-NEXT: Running pass: GlobalOptPass +; CHECK-O-NEXT: Running analysis: TargetLibraryAnalysis on bar ; CHECK-O-NEXT: Running pass: GlobalDCEPass ; CHECK-O-NEXT: Running pass: AnnotationRemarksPass on foo ; CHECK-O-NEXT: Running pass: CanonicalizeAliasesPass diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/check-prof-info.ll b/llvm/test/Transforms/LoopVectorize/AArch64/check-prof-info.ll index 1f619898ea788..8a782e827701d 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/check-prof-info.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/check-prof-info.ll @@ -20,11 +20,11 @@ define void @_Z3foov() { ; CHECK-V1-IC1: [[VECTOR_BODY]]: ; CHECK-V1-IC1: br i1 [[TMP10:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF0]], !llvm.loop [[LOOP1:![0-9]+]] ; CHECK-V1-IC1: [[MIDDLE_BLOCK]]: -; CHECK-V1-IC1: br i1 [[CMP_N:%.*]], label %[[FOR_COND_CLEANUP:.*]], label %[[SCALAR_PH]], !prof [[PROF4:![0-9]+]] +; CHECK-V1-IC1: br i1 [[CMP_N:%.*]], label %[[FOR_COND_CLEANUP:.*]], label %[[SCALAR_PH]], !prof [[PROF5:![0-9]+]] ; CHECK-V1-IC1: [[SCALAR_PH]]: ; CHECK-V1-IC1: br label %[[FOR_BODY:.*]] ; CHECK-V1-IC1: [[FOR_BODY]]: -; CHECK-V1-IC1: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !prof [[PROF5:![0-9]+]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-V1-IC1: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !prof [[PROF6:![0-9]+]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK-V1-IC1: [[FOR_COND_CLEANUP]]: ; ; CHECK-V2-IC1-LABEL: define void @_Z3foov( @@ -36,11 +36,11 @@ define void @_Z3foov() { ; CHECK-V2-IC1: [[VECTOR_BODY]]: ; CHECK-V2-IC1: br i1 [[TMP4:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF1:![0-9]+]], !llvm.loop [[LOOP2:![0-9]+]] ; CHECK-V2-IC1: [[MIDDLE_BLOCK]]: -; CHECK-V2-IC1: br i1 true, label %[[FOR_COND_CLEANUP:.*]], label %[[SCALAR_PH]], !prof [[PROF5:![0-9]+]] +; CHECK-V2-IC1: br i1 true, label %[[FOR_COND_CLEANUP:.*]], label %[[SCALAR_PH]], !prof [[PROF6:![0-9]+]] ; CHECK-V2-IC1: [[SCALAR_PH]]: ; CHECK-V2-IC1: br label %[[FOR_BODY:.*]] ; CHECK-V2-IC1: [[FOR_BODY]]: -; CHECK-V2-IC1: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !prof [[PROF6:![0-9]+]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-V2-IC1: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !prof [[PROF7:![0-9]+]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK-V2-IC1: [[FOR_COND_CLEANUP]]: ; ; CHECK-V2-IC4-LABEL: define void @_Z3foov( @@ -54,19 +54,19 @@ define void @_Z3foov() { ; CHECK-V2-IC4: [[VECTOR_BODY]]: ; CHECK-V2-IC4: br i1 [[TMP12:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF1:![0-9]+]], !llvm.loop [[LOOP2:![0-9]+]] ; CHECK-V2-IC4: [[MIDDLE_BLOCK]]: -; CHECK-V2-IC4: br i1 true, label %[[FOR_COND_CLEANUP:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]], !prof [[PROF5:![0-9]+]] +; CHECK-V2-IC4: br i1 true, label %[[FOR_COND_CLEANUP:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]], !prof [[PROF6:![0-9]+]] ; CHECK-V2-IC4: [[VEC_EPILOG_ITER_CHECK]]: -; CHECK-V2-IC4: br i1 [[MIN_EPILOG_ITERS_CHECK:%.*]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF6:![0-9]+]] +; CHECK-V2-IC4: br i1 [[MIN_EPILOG_ITERS_CHECK:%.*]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF7:![0-9]+]] ; CHECK-V2-IC4: [[VEC_EPILOG_PH]]: ; CHECK-V2-IC4: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] ; CHECK-V2-IC4: [[VEC_EPILOG_VECTOR_BODY]]: -; CHECK-V2-IC4: br i1 [[TMP23:%.*]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-V2-IC4: br i1 [[TMP23:%.*]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK-V2-IC4: [[VEC_EPILOG_MIDDLE_BLOCK]]: -; CHECK-V2-IC4: br i1 [[CMP_N:%.*]], label %[[FOR_COND_CLEANUP]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF8:![0-9]+]] +; CHECK-V2-IC4: br i1 [[CMP_N:%.*]], label %[[FOR_COND_CLEANUP]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF10:![0-9]+]] ; CHECK-V2-IC4: [[VEC_EPILOG_SCALAR_PH]]: ; CHECK-V2-IC4: br label %[[FOR_BODY:.*]] ; CHECK-V2-IC4: [[FOR_BODY]]: -; CHECK-V2-IC4: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !prof [[PROF9:![0-9]+]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-V2-IC4: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !prof [[PROF11:![0-9]+]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK-V2-IC4: [[FOR_COND_CLEANUP]]: ; entry: @@ -89,31 +89,37 @@ for.cond.cleanup: ; preds = %for.body !0 = !{!"branch_weights", i32 1, i32 1023} ;. ; CHECK-V1-IC1: [[PROF0]] = !{!"branch_weights", i32 1, i32 127} -; CHECK-V1-IC1: [[LOOP1]] = distinct !{[[LOOP1]], [[META2:![0-9]+]], [[META3:![0-9]+]]} +; CHECK-V1-IC1: [[LOOP1]] = distinct !{[[LOOP1]], [[META2:![0-9]+]], [[META3:![0-9]+]], [[META4:![0-9]+]]} ; CHECK-V1-IC1: [[META2]] = !{!"llvm.loop.isvectorized", i32 1} ; CHECK-V1-IC1: [[META3]] = !{!"llvm.loop.unroll.runtime.disable"} -; CHECK-V1-IC1: [[PROF4]] = !{!"branch_weights", i32 1, i32 7} -; CHECK-V1-IC1: [[PROF5]] = !{!"branch_weights", i32 0, i32 0} -; CHECK-V1-IC1: [[LOOP6]] = distinct !{[[LOOP6]], [[META3]], [[META2]]} +; CHECK-V1-IC1: [[META4]] = !{!"llvm.loop.estimated_trip_count", i32 128} +; CHECK-V1-IC1: [[PROF5]] = !{!"branch_weights", i32 1, i32 7} +; CHECK-V1-IC1: [[PROF6]] = !{!"branch_weights", i32 0, i32 0} +; CHECK-V1-IC1: [[LOOP7]] = distinct !{[[LOOP7]], [[META8:![0-9]+]], [[META3]], [[META2]]} +; CHECK-V1-IC1: [[META8]] = !{!"llvm.loop.estimated_trip_count", i32 0} ;. ; CHECK-V2-IC1: [[PROF0]] = !{!"branch_weights", i32 1, i32 127} ; CHECK-V2-IC1: [[PROF1]] = !{!"branch_weights", i32 1, i32 255} -; CHECK-V2-IC1: [[LOOP2]] = distinct !{[[LOOP2]], [[META3:![0-9]+]], [[META4:![0-9]+]]} +; CHECK-V2-IC1: [[LOOP2]] = distinct !{[[LOOP2]], [[META3:![0-9]+]], [[META4:![0-9]+]], [[META5:![0-9]+]]} ; CHECK-V2-IC1: [[META3]] = !{!"llvm.loop.isvectorized", i32 1} ; CHECK-V2-IC1: [[META4]] = !{!"llvm.loop.unroll.runtime.disable"} -; CHECK-V2-IC1: [[PROF5]] = !{!"branch_weights", i32 1, i32 3} -; CHECK-V2-IC1: [[PROF6]] = !{!"branch_weights", i32 0, i32 0} -; CHECK-V2-IC1: [[LOOP7]] = distinct !{[[LOOP7]], [[META4]], [[META3]]} +; CHECK-V2-IC1: [[META5]] = !{!"llvm.loop.estimated_trip_count", i32 256} +; CHECK-V2-IC1: [[PROF6]] = !{!"branch_weights", i32 1, i32 3} +; CHECK-V2-IC1: [[PROF7]] = !{!"branch_weights", i32 0, i32 0} +; CHECK-V2-IC1: [[LOOP8]] = distinct !{[[LOOP8]], [[META9:![0-9]+]], [[META4]], [[META3]]} +; CHECK-V2-IC1: [[META9]] = !{!"llvm.loop.estimated_trip_count", i32 0} ;. ; CHECK-V2-IC4: [[PROF0]] = !{!"branch_weights", i32 1, i32 127} ; CHECK-V2-IC4: [[PROF1]] = !{!"branch_weights", i32 1, i32 63} -; CHECK-V2-IC4: [[LOOP2]] = distinct !{[[LOOP2]], [[META3:![0-9]+]], [[META4:![0-9]+]]} +; CHECK-V2-IC4: [[LOOP2]] = distinct !{[[LOOP2]], [[META3:![0-9]+]], [[META4:![0-9]+]], [[META5:![0-9]+]]} ; CHECK-V2-IC4: [[META3]] = !{!"llvm.loop.isvectorized", i32 1} ; CHECK-V2-IC4: [[META4]] = !{!"llvm.loop.unroll.runtime.disable"} -; CHECK-V2-IC4: [[PROF5]] = !{!"branch_weights", i32 1, i32 15} -; CHECK-V2-IC4: [[PROF6]] = !{!"branch_weights", i32 2, i32 0} -; CHECK-V2-IC4: [[LOOP7]] = distinct !{[[LOOP7]], [[META3]], [[META4]]} -; CHECK-V2-IC4: [[PROF8]] = !{!"branch_weights", i32 1, i32 1} -; CHECK-V2-IC4: [[PROF9]] = !{!"branch_weights", i32 0, i32 0} -; CHECK-V2-IC4: [[LOOP10]] = distinct !{[[LOOP10]], [[META4]], [[META3]]} +; CHECK-V2-IC4: [[META5]] = !{!"llvm.loop.estimated_trip_count", i32 64} +; CHECK-V2-IC4: [[PROF6]] = !{!"branch_weights", i32 1, i32 15} +; CHECK-V2-IC4: [[PROF7]] = !{!"branch_weights", i32 2, i32 0} +; CHECK-V2-IC4: [[LOOP8]] = distinct !{[[LOOP8]], [[META9:![0-9]+]], [[META3]], [[META4]]} +; CHECK-V2-IC4: [[META9]] = !{!"llvm.loop.estimated_trip_count", i32 0} +; CHECK-V2-IC4: [[PROF10]] = !{!"branch_weights", i32 1, i32 1} +; CHECK-V2-IC4: [[PROF11]] = !{!"branch_weights", i32 0, i32 0} +; CHECK-V2-IC4: [[LOOP12]] = distinct !{[[LOOP12]], [[META9]], [[META4]], [[META3]]} ;. diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll b/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll index 08adfdd4793eb..8993032fc6d72 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll @@ -47,7 +47,7 @@ define void @test(ptr noundef align 8 dereferenceable_or_null(16) %arr) #0 { ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] ; CHECK-NEXT: [[AND:%.*]] = and i64 [[IV]], 1 ; CHECK-NEXT: [[ICMP17:%.*]] = icmp eq i64 [[AND]], 0 -; CHECK-NEXT: br i1 [[ICMP17]], label [[BB18:%.*]], label [[LOOP_LATCH]], !prof [[PROF5:![0-9]+]] +; CHECK-NEXT: br i1 [[ICMP17]], label [[BB18:%.*]], label [[LOOP_LATCH]], !prof [[PROF6:![0-9]+]] ; CHECK: bb18: ; CHECK-NEXT: [[OR:%.*]] = or disjoint i64 [[IV]], 1 ; CHECK-NEXT: [[GETELEMENTPTR19:%.*]] = getelementptr inbounds i64, ptr [[ARR]], i64 [[OR]] @@ -56,7 +56,7 @@ define void @test(ptr noundef align 8 dereferenceable_or_null(16) %arr) #0 { ; CHECK: loop.latch: ; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1 ; CHECK-NEXT: [[ICMP22:%.*]] = icmp eq i64 [[IV_NEXT]], 90 -; CHECK-NEXT: br i1 [[ICMP22]], label [[BB6]], label [[LOOP_HEADER]], !prof [[PROF6:![0-9]+]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-NEXT: br i1 [[ICMP22]], label [[BB6]], label [[LOOP_HEADER]], !prof [[PROF7:![0-9]+]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: bb6: ; CHECK-NEXT: ret void ; @@ -97,10 +97,12 @@ attributes #0 = {"target-cpu"="haswell" "target-features"="+avx2" } ;. ; CHECK: [[PROF0]] = !{!"branch_weights", i32 1, i32 127} ; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 23} -; CHECK: [[LOOP2]] = distinct !{[[LOOP2]], [[META3:![0-9]+]], [[META4:![0-9]+]]} +; CHECK: [[LOOP2]] = distinct !{[[LOOP2]], [[META3:![0-9]+]], [[META4:![0-9]+]], [[META5:![0-9]+]]} ; CHECK: [[META3]] = !{!"llvm.loop.isvectorized", i32 1} ; CHECK: [[META4]] = !{!"llvm.loop.unroll.runtime.disable"} -; CHECK: [[PROF5]] = !{!"branch_weights", i32 1, i32 1} -; CHECK: [[PROF6]] = !{!"branch_weights", i32 0, i32 0} -; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META4]], [[META3]]} +; CHECK: [[META5]] = !{!"llvm.loop.estimated_trip_count", i32 24} +; CHECK: [[PROF6]] = !{!"branch_weights", i32 1, i32 1} +; CHECK: [[PROF7]] = !{!"branch_weights", i32 0, i32 0} +; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META9:![0-9]+]], [[META4]], [[META3]]} +; CHECK: [[META9]] = !{!"llvm.loop.estimated_trip_count", i32 0} ;. diff --git a/llvm/test/Transforms/LoopVectorize/branch-weights.ll b/llvm/test/Transforms/LoopVectorize/branch-weights.ll index 6892709f085f7..08bc920cef0e0 100644 --- a/llvm/test/Transforms/LoopVectorize/branch-weights.ll +++ b/llvm/test/Transforms/LoopVectorize/branch-weights.ll @@ -34,23 +34,23 @@ define void @f0(i8 %n, i32 %len, ptr %p) !prof !0 { ; MAINVF4IC1_EPI4: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF3:![0-9]+]], !llvm.loop [[LOOP4:![0-9]+]] ; MAINVF4IC1_EPI4: [[MIDDLE_BLOCK]]: ; MAINVF4IC1_EPI4: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] -; MAINVF4IC1_EPI4: br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]], !prof [[PROF7:![0-9]+]] +; MAINVF4IC1_EPI4: br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]], !prof [[PROF8:![0-9]+]] ; MAINVF4IC1_EPI4: [[VEC_EPILOG_ITER_CHECK]]: ; MAINVF4IC1_EPI4: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i32 [[N_VEC_REMAINING:%.*]], 4 -; MAINVF4IC1_EPI4: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF8:![0-9]+]] +; MAINVF4IC1_EPI4: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF9:![0-9]+]] ; MAINVF4IC1_EPI4: [[VEC_EPILOG_PH]]: ; MAINVF4IC1_EPI4: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] ; MAINVF4IC1_EPI4: [[VEC_EPILOG_VECTOR_BODY]]: ; MAINVF4IC1_EPI4: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT6:%.*]], [[N_VEC3:%.*]] -; MAINVF4IC1_EPI4: br i1 [[TMP12]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !prof [[PROF9:![0-9]+]], !llvm.loop [[LOOP10:![0-9]+]] +; MAINVF4IC1_EPI4: br i1 [[TMP12]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !prof [[PROF10:![0-9]+]], !llvm.loop [[LOOP11:![0-9]+]] ; MAINVF4IC1_EPI4: [[VEC_EPILOG_MIDDLE_BLOCK]]: ; MAINVF4IC1_EPI4: [[CMP_N8:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC3]] -; MAINVF4IC1_EPI4: br i1 [[CMP_N8]], label %[[EXIT_LOOPEXIT]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF7]] +; MAINVF4IC1_EPI4: br i1 [[CMP_N8]], label %[[EXIT_LOOPEXIT]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF8]] ; MAINVF4IC1_EPI4: [[VEC_EPILOG_SCALAR_PH]]: ; MAINVF4IC1_EPI4: br label %[[LOOP:.*]] ; MAINVF4IC1_EPI4: [[LOOP]]: ; MAINVF4IC1_EPI4: [[CMP_LOOP:%.*]] = icmp ult i32 [[I32:%.*]], [[LEN]] -; MAINVF4IC1_EPI4: br i1 [[CMP_LOOP]], label %[[LOOP]], label %[[EXIT_LOOPEXIT]], !prof [[PROF11:![0-9]+]], !llvm.loop [[LOOP12:![0-9]+]] +; MAINVF4IC1_EPI4: br i1 [[CMP_LOOP]], label %[[LOOP]], label %[[EXIT_LOOPEXIT]], !prof [[PROF13:![0-9]+]], !llvm.loop [[LOOP14:![0-9]+]] ; MAINVF4IC1_EPI4: [[EXIT_LOOPEXIT]]: ; MAINVF4IC1_EPI4: br label %[[EXIT]] ; MAINVF4IC1_EPI4: [[EXIT]]: @@ -77,23 +77,23 @@ define void @f0(i8 %n, i32 %len, ptr %p) !prof !0 { ; MAINVF4IC2_EPI4: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF3:![0-9]+]], !llvm.loop [[LOOP4:![0-9]+]] ; MAINVF4IC2_EPI4: [[MIDDLE_BLOCK]]: ; MAINVF4IC2_EPI4: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] -; MAINVF4IC2_EPI4: br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]], !prof [[PROF7:![0-9]+]] +; MAINVF4IC2_EPI4: br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]], !prof [[PROF8:![0-9]+]] ; MAINVF4IC2_EPI4: [[VEC_EPILOG_ITER_CHECK]]: ; MAINVF4IC2_EPI4: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i32 [[N_VEC_REMAINING:%.*]], 4 -; MAINVF4IC2_EPI4: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF8:![0-9]+]] +; MAINVF4IC2_EPI4: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF9:![0-9]+]] ; MAINVF4IC2_EPI4: [[VEC_EPILOG_PH]]: ; MAINVF4IC2_EPI4: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] ; MAINVF4IC2_EPI4: [[VEC_EPILOG_VECTOR_BODY]]: ; MAINVF4IC2_EPI4: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT6:%.*]], [[N_VEC3:%.*]] -; MAINVF4IC2_EPI4: br i1 [[TMP13]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !prof [[PROF9:![0-9]+]], !llvm.loop [[LOOP10:![0-9]+]] +; MAINVF4IC2_EPI4: br i1 [[TMP13]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !prof [[PROF10:![0-9]+]], !llvm.loop [[LOOP11:![0-9]+]] ; MAINVF4IC2_EPI4: [[VEC_EPILOG_MIDDLE_BLOCK]]: ; MAINVF4IC2_EPI4: [[CMP_N8:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC3]] -; MAINVF4IC2_EPI4: br i1 [[CMP_N8]], label %[[EXIT_LOOPEXIT]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF11:![0-9]+]] +; MAINVF4IC2_EPI4: br i1 [[CMP_N8]], label %[[EXIT_LOOPEXIT]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF13:![0-9]+]] ; MAINVF4IC2_EPI4: [[VEC_EPILOG_SCALAR_PH]]: ; MAINVF4IC2_EPI4: br label %[[LOOP:.*]] ; MAINVF4IC2_EPI4: [[LOOP]]: ; MAINVF4IC2_EPI4: [[CMP_LOOP:%.*]] = icmp ult i32 [[I32:%.*]], [[LEN]] -; MAINVF4IC2_EPI4: br i1 [[CMP_LOOP]], label %[[LOOP]], label %[[EXIT_LOOPEXIT]], !prof [[PROF12:![0-9]+]], !llvm.loop [[LOOP13:![0-9]+]] +; MAINVF4IC2_EPI4: br i1 [[CMP_LOOP]], label %[[LOOP]], label %[[EXIT_LOOPEXIT]], !prof [[PROF14:![0-9]+]], !llvm.loop [[LOOP15:![0-9]+]] ; MAINVF4IC2_EPI4: [[EXIT_LOOPEXIT]]: ; MAINVF4IC2_EPI4: br label %[[EXIT]] ; MAINVF4IC2_EPI4: [[EXIT]]: @@ -127,28 +127,34 @@ exit: ; MAINVF4IC1_EPI4: [[PROF1]] = !{!"branch_weights", i32 12, i32 1} ; MAINVF4IC1_EPI4: [[PROF2]] = !{!"branch_weights", i32 1, i32 127} ; MAINVF4IC1_EPI4: [[PROF3]] = !{!"branch_weights", i32 1, i32 307} -; MAINVF4IC1_EPI4: [[LOOP4]] = distinct !{[[LOOP4]], [[META5:![0-9]+]], [[META6:![0-9]+]]} +; MAINVF4IC1_EPI4: [[LOOP4]] = distinct !{[[LOOP4]], [[META5:![0-9]+]], [[META6:![0-9]+]], [[META7:![0-9]+]]} ; MAINVF4IC1_EPI4: [[META5]] = !{!"llvm.loop.isvectorized", i32 1} ; MAINVF4IC1_EPI4: [[META6]] = !{!"llvm.loop.unroll.runtime.disable"} -; MAINVF4IC1_EPI4: [[PROF7]] = !{!"branch_weights", i32 1, i32 3} -; MAINVF4IC1_EPI4: [[PROF8]] = !{!"branch_weights", i32 4, i32 0} -; MAINVF4IC1_EPI4: [[PROF9]] = !{!"branch_weights", i32 0, i32 0} -; MAINVF4IC1_EPI4: [[LOOP10]] = distinct !{[[LOOP10]], [[META5]], [[META6]]} -; MAINVF4IC1_EPI4: [[PROF11]] = !{!"branch_weights", i32 2, i32 1} -; MAINVF4IC1_EPI4: [[LOOP12]] = distinct !{[[LOOP12]], [[META5]]} +; MAINVF4IC1_EPI4: [[META7]] = !{!"llvm.loop.estimated_trip_count", i32 308} +; MAINVF4IC1_EPI4: [[PROF8]] = !{!"branch_weights", i32 1, i32 3} +; MAINVF4IC1_EPI4: [[PROF9]] = !{!"branch_weights", i32 4, i32 0} +; MAINVF4IC1_EPI4: [[PROF10]] = !{!"branch_weights", i32 0, i32 0} +; MAINVF4IC1_EPI4: [[LOOP11]] = distinct !{[[LOOP11]], [[META5]], [[META6]], [[META12:![0-9]+]]} +; MAINVF4IC1_EPI4: [[META12]] = !{!"llvm.loop.estimated_trip_count", i32 0} +; MAINVF4IC1_EPI4: [[PROF13]] = !{!"branch_weights", i32 2, i32 1} +; MAINVF4IC1_EPI4: [[LOOP14]] = distinct !{[[LOOP14]], [[META15:![0-9]+]], [[META5]]} +; MAINVF4IC1_EPI4: [[META15]] = !{!"llvm.loop.estimated_trip_count", i32 3} ;. ; MAINVF4IC2_EPI4: [[PROF0]] = !{!"function_entry_count", i64 13} ; MAINVF4IC2_EPI4: [[PROF1]] = !{!"branch_weights", i32 12, i32 1} ; MAINVF4IC2_EPI4: [[PROF2]] = !{!"branch_weights", i32 1, i32 127} ; MAINVF4IC2_EPI4: [[PROF3]] = !{!"branch_weights", i32 1, i32 153} -; MAINVF4IC2_EPI4: [[LOOP4]] = distinct !{[[LOOP4]], [[META5:![0-9]+]], [[META6:![0-9]+]]} +; MAINVF4IC2_EPI4: [[LOOP4]] = distinct !{[[LOOP4]], [[META5:![0-9]+]], [[META6:![0-9]+]], [[META7:![0-9]+]]} ; MAINVF4IC2_EPI4: [[META5]] = !{!"llvm.loop.isvectorized", i32 1} ; MAINVF4IC2_EPI4: [[META6]] = !{!"llvm.loop.unroll.runtime.disable"} -; MAINVF4IC2_EPI4: [[PROF7]] = !{!"branch_weights", i32 1, i32 7} -; MAINVF4IC2_EPI4: [[PROF8]] = !{!"branch_weights", i32 4, i32 0} -; MAINVF4IC2_EPI4: [[PROF9]] = !{!"branch_weights", i32 0, i32 0} -; MAINVF4IC2_EPI4: [[LOOP10]] = distinct !{[[LOOP10]], [[META5]], [[META6]]} -; MAINVF4IC2_EPI4: [[PROF11]] = !{!"branch_weights", i32 1, i32 3} -; MAINVF4IC2_EPI4: [[PROF12]] = !{!"branch_weights", i32 2, i32 1} -; MAINVF4IC2_EPI4: [[LOOP13]] = distinct !{[[LOOP13]], [[META5]]} +; MAINVF4IC2_EPI4: [[META7]] = !{!"llvm.loop.estimated_trip_count", i32 154} +; MAINVF4IC2_EPI4: [[PROF8]] = !{!"branch_weights", i32 1, i32 7} +; MAINVF4IC2_EPI4: [[PROF9]] = !{!"branch_weights", i32 4, i32 0} +; MAINVF4IC2_EPI4: [[PROF10]] = !{!"branch_weights", i32 0, i32 0} +; MAINVF4IC2_EPI4: [[LOOP11]] = distinct !{[[LOOP11]], [[META5]], [[META6]], [[META12:![0-9]+]]} +; MAINVF4IC2_EPI4: [[META12]] = !{!"llvm.loop.estimated_trip_count", i32 0} +; MAINVF4IC2_EPI4: [[PROF13]] = !{!"branch_weights", i32 1, i32 3} +; MAINVF4IC2_EPI4: [[PROF14]] = !{!"branch_weights", i32 2, i32 1} +; MAINVF4IC2_EPI4: [[LOOP15]] = distinct !{[[LOOP15]], [[META16:![0-9]+]], [[META5]]} +; MAINVF4IC2_EPI4: [[META16]] = !{!"llvm.loop.estimated_trip_count", i32 3} ;. diff --git a/llvm/test/Transforms/PGOProfile/pgo-estimate-trip-counts.ll b/llvm/test/Transforms/PGOProfile/pgo-estimate-trip-counts.ll new file mode 100644 index 0000000000000..f3fe8fb373694 --- /dev/null +++ b/llvm/test/Transforms/PGOProfile/pgo-estimate-trip-counts.ll @@ -0,0 +1,240 @@ +; Check the pgo-estimate-trip-counts pass. Indirectly check +; llvm::getLoopEstimatedTripCount and llvm::setLoopEstimatedTripCount. + +; RUN: opt %s -S -passes=pgo-estimate-trip-counts 2>&1 | \ +; RUN: FileCheck %s -implicit-check-not='{{^[^ ;]*:}}' + +; No metadata and trip count is estimable: create metadata with value. +; +; CHECK-LABEL: define void @estimable(i32 %n) { +define void @estimable(i32 %n) { +; CHECK: entry: +entry: + br label %body + +; CHECK: body: +body: + %i = phi i32 [ 0, %entry ], [ %inc, %body ] + %inc = add nsw i32 %i, 1 + %cmp = icmp slt i32 %inc, %n + ; CHECK: br i1 %cmp, label %body, label %exit, !prof !0, !llvm.loop ![[#ESTIMABLE:]] + br i1 %cmp, label %body, label %exit, !prof !0 + +; CHECK: exit: +exit: + ret void +} + +; No metadata and trip count is inestimable because no branch weights: create +; metadata with no value. +; +; CHECK-LABEL: define void @no_branch_weights(i32 %n) { +define void @no_branch_weights(i32 %n) { +; CHECK: entry: +entry: + br label %body + +; CHECK: body: +body: + %i = phi i32 [ 0, %entry ], [ %inc, %body ] + %inc = add nsw i32 %i, 1 + %cmp = icmp slt i32 %inc, %n + ; CHECK: br i1 %cmp, label %body, label %exit, !llvm.loop ![[#NO_BRANCH_WEIGHTS:]] + br i1 %cmp, label %body, label %exit + +; CHECK: exit: +exit: + ret void +} + +; No metadata and trip count is inestimable because multiple latches: create +; metadata with no value. +; +; CHECK-LABEL: define void @multi_latch(i32 %n, i1 %c) { +define void @multi_latch(i32 %n, i1 %c) { +; CHECK: entry: +entry: + br label %head + +; CHECK: head: +head: + %i = phi i32 [ 0, %entry ], [ %inc, %latch0], [ %inc, %latch1 ] + %inc = add nsw i32 %i, 1 + %cmp = icmp slt i32 %inc, %n + ; CHECK: br i1 %cmp, label %latch0, label %exit, !prof !0 + br i1 %cmp, label %latch0, label %exit, !prof !0 + +; CHECK: latch0: +latch0: + ; CHECK: br i1 %c, label %head, label %latch1, !prof !0, !llvm.loop ![[#MULTI_LATCH:]] + br i1 %c, label %head, label %latch1, !prof !0 + +; CHECK: latch1: +latch1: + ; CHECK: br label %head + br label %head + +; CHECK: exit: +exit: + ret void +} + +; Metadata is already present with value, and trip count is estimable: keep the +; existing metadata value. +; +; CHECK-LABEL: define void @val_estimable(i32 %n) { +define void @val_estimable(i32 %n) { +; CHECK: entry: +entry: + br label %body + +; CHECK: body: +body: + %i = phi i32 [ 0, %entry ], [ %inc, %body ] + %inc = add nsw i32 %i, 1 + %cmp = icmp slt i32 %inc, %n + ; CHECK: br i1 %cmp, label %body, label %exit, !prof !0, !llvm.loop ![[#VAL_ESTIMABLE:]] + br i1 %cmp, label %body, label %exit, !prof !0, !llvm.loop !1 + +; CHECK: exit: +exit: + ret void +} + +; Metadata is already present with value, and trip count is inestimable: keep +; the existing metadata value. +; +; CHECK-LABEL: define void @val_inestimable(i32 %n) { +define void @val_inestimable(i32 %n) { +; CHECK: entry: +entry: + br label %body + +; CHECK: body: +body: + %i = phi i32 [ 0, %entry ], [ %inc, %body ] + %inc = add nsw i32 %i, 1 + %cmp = icmp slt i32 %inc, %n + ; CHECK: br i1 %cmp, label %body, label %exit, !llvm.loop ![[#VAL_INESTIMABLE:]] + br i1 %cmp, label %body, label %exit, !llvm.loop !3 + +; CHECK: exit: +exit: + ret void +} + +; Metadata is already present without value, and trip count is estimable: add +; new value to metadata. +; +; CHECK-LABEL: define void @no_val_estimable(i32 %n) { +define void @no_val_estimable(i32 %n) { +; CHECK: entry: +entry: + br label %body + +; CHECK: body: +body: + %i = phi i32 [ 0, %entry ], [ %inc, %body ] + %inc = add nsw i32 %i, 1 + %cmp = icmp slt i32 %inc, %n + ; CHECK: br i1 %cmp, label %body, label %exit, !prof !0, !llvm.loop ![[#NO_VAL_ESTIMABLE:]] + br i1 %cmp, label %body, label %exit, !prof !0, !llvm.loop !5 + +; CHECK: exit: +exit: + ret void +} + +; Metadata is already present without value, and trip count is inestimable: +; leave no value on metadata. +; +; CHECK-LABEL: define void @no_val_inestimable(i32 %n) { +define void @no_val_inestimable(i32 %n) { +; CHECK: entry: +entry: + br label %body + +; CHECK: body: +body: + %i = phi i32 [ 0, %entry ], [ %inc, %body ] + %inc = add nsw i32 %i, 1 + %cmp = icmp slt i32 %inc, %n + ; CHECK: br i1 %cmp, label %body, label %exit, !llvm.loop ![[#NO_VAL_INESTIMABLE:]] + br i1 %cmp, label %body, label %exit, !llvm.loop !7 + +; CHECK: exit: +exit: + ret void +} + +; Check that nested loops are visited. +; +; CHECK-LABEL: define void @nested(i32 %n) { +define void @nested(i32 %n) { +; CHECK: entry: +entry: + br label %loop0.head + +; CHECK: loop0.head: +loop0.head: + %loop0.i = phi i32 [ 0, %entry ], [ %loop0.inc, %loop0.latch ] + br label %loop1.head + +; CHECK: loop1.head: +loop1.head: + %loop1.i = phi i32 [ 0, %loop0.head ], [ %loop1.inc, %loop1.latch ] + br label %loop2 + +; CHECK: loop2: +loop2: + %loop2.i = phi i32 [ 0, %loop1.head ], [ %loop2.inc, %loop2 ] + %loop2.inc = add nsw i32 %loop2.i, 1 + %loop2.cmp = icmp slt i32 %loop2.inc, %n + ; CHECK: br i1 %loop2.cmp, label %loop2, label %loop1.latch, !prof !0, !llvm.loop ![[#NESTED_LOOP2:]] + br i1 %loop2.cmp, label %loop2, label %loop1.latch, !prof !0 + +; CHECK: loop1.latch: +loop1.latch: + %loop1.inc = add nsw i32 %loop1.i, 1 + %loop1.cmp = icmp slt i32 %loop1.inc, %n + ; CHECK: br i1 %loop1.cmp, label %loop1.head, label %loop0.latch, !prof !0, !llvm.loop ![[#NESTED_LOOP1:]] + br i1 %loop1.cmp, label %loop1.head, label %loop0.latch, !prof !0 + +; CHECK: loop0.latch: +loop0.latch: + %loop0.inc = add nsw i32 %loop0.i, 1 + %loop0.cmp = icmp slt i32 %loop0.inc, %n + ; CHECK: br i1 %loop0.cmp, label %loop0.head, label %exit, !prof !0, !llvm.loop ![[#NESTED_LOOP0:]] + br i1 %loop0.cmp, label %loop0.head, label %exit, !prof !0 + +; CHECK: exit: +exit: + ret void +} + +; CHECK: !0 = !{!"branch_weights", i32 9, i32 1} +; +; CHECK: ![[#ESTIMABLE]] = distinct !{![[#ESTIMABLE]], ![[#ESTIMABLE_TC:]]} +; CHECK: ![[#ESTIMABLE_TC]] = !{!"llvm.loop.estimated_trip_count", i32 10} +; +; CHECK: ![[#NO_BRANCH_WEIGHTS]] = distinct !{![[#NO_BRANCH_WEIGHTS]], ![[#INESTIMABLE_TC:]]} +; CHECK: ![[#INESTIMABLE_TC]] = !{!"llvm.loop.estimated_trip_count"} +; CHECK: ![[#MULTI_LATCH]] = distinct !{![[#MULTI_LATCH]], ![[#INESTIMABLE_TC:]]} +; +; CHECK: ![[#VAL_ESTIMABLE]] = distinct !{![[#VAL_ESTIMABLE]], ![[#VAL_TC:]]} +; CHECK: ![[#VAL_TC]] = !{!"llvm.loop.estimated_trip_count", i32 5} +; CHECK: ![[#VAL_INESTIMABLE]] = distinct !{![[#VAL_INESTIMABLE]], ![[#VAL_TC:]]} +; +; CHECK: ![[#NO_VAL_ESTIMABLE]] = distinct !{![[#NO_VAL_ESTIMABLE]], ![[#ESTIMABLE_TC:]]} +; CHECK: ![[#NO_VAL_INESTIMABLE]] = distinct !{![[#NO_VAL_INESTIMABLE]], ![[#INESTIMABLE_TC:]]} +; +; CHECK: ![[#NESTED_LOOP2]] = distinct !{![[#NESTED_LOOP2]], ![[#ESTIMABLE_TC:]]} +; CHECK: ![[#NESTED_LOOP1]] = distinct !{![[#NESTED_LOOP1]], ![[#ESTIMABLE_TC:]]} +; CHECK: ![[#NESTED_LOOP0]] = distinct !{![[#NESTED_LOOP0]], ![[#ESTIMABLE_TC:]]} +!0 = !{!"branch_weights", i32 9, i32 1} +!1 = distinct !{!1, !2} +!2 = !{!"llvm.loop.estimated_trip_count", i32 5} +!3 = distinct !{!3, !2} +!5 = distinct !{!5, !6} +!6 = !{!"llvm.loop.estimated_trip_count"} +!7 = distinct !{!7, !6}