llvm · huntergr-arm · Jul 30, 2025 · Aug 12, 2025 · david-arm · Aug 11, 2025
diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
@@ -692,6 +692,37 @@ m_Intrinsic(const T0 &Op0, const T1 &Op1, const T2 &Op2, const T3 &Op3) {
   return m_CombineAnd(m_Intrinsic<IntrID>(Op0, Op1, Op2), m_Argument<3>(Op3));
 }
 
+struct loop_invariant_vpvalue {
+  template <typename ITy> bool match(ITy *V) const {
+    VPValue *Val = dyn_cast<VPValue>(V);
+    return Val && Val->isDefinedOutsideLoopRegions();
+  }
+};
+
+inline loop_invariant_vpvalue m_LoopInvVPValue() {
+  return loop_invariant_vpvalue();
+}
+
+template <typename Op0_t>
+inline UnaryVPInstruction_match<Op0_t, VPInstruction::AnyOf>
+m_AnyOf(const Op0_t &Op0) {
+  return m_VPInstruction<VPInstruction::AnyOf>(Op0);
+}
+
+template <typename SubPattern_t> struct OneUse_match {
+  SubPattern_t SubPattern;
+
+  OneUse_match(const SubPattern_t &SP) : SubPattern(SP) {}
+
+  template <typename OpTy> bool match(OpTy *V) {
+    return V->hasOneUse() && SubPattern.match(V);
+  }
+};
+
+template <typename T> inline OneUse_match<T> m_OneUse(const T &SubPattern) {
+  return SubPattern;
+}
+
 } // namespace VPlanPatternMatch
 } // namespace llvm
 

diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
@@ -138,3 +138,110 @@ VPBasicBlock *vputils::getFirstLoopHeader(VPlan &Plan, VPDominatorTree &VPDT) {
   });
   return I == DepthFirst.end() ? nullptr : cast<VPBasicBlock>(*I);
 }
+
+std::optional<VPValue *>
+vputils::getRecipesForUncountedExit(VPlan &Plan,
+                                    SmallVectorImpl<VPRecipeBase *> &Recipes,
+                                    SmallVectorImpl<VPRecipeBase *> &GEPs) {
+  using namespace llvm::VPlanPatternMatch;
+  // Given a vplan like the following (just including the recipes contributing
+  // to loop control exiting here, not the actual work), we're looking to match
+  // the recipes contributing to the uncounted exit condition comparison
+  // (here, vp<%4>) back to the canonical induction for the vector body so that
+  // we can copy them to a preheader and rotate the address in the loop to the
+  // next vector iteration.
+  //
+  // VPlan ' for UF>=1' {
+  // Live-in vp<%0> = VF
+  // Live-in ir<64> = original trip-count
+  //
+  // entry:
+  // Successor(s): preheader, vector.ph
+  //
+  // vector.ph:
+  // Successor(s): vector loop
+  //
+  // <x1> vector loop: {
+  //   vector.body:
+  //     EMIT vp<%2> = CANONICAL-INDUCTION ir<0>
+  //     vp<%3> = SCALAR-STEPS vp<%2>, ir<1>, vp<%0>
+  //     CLONE ir<%ee.addr> = getelementptr ir<0>, vp<%3>
+  //     WIDEN ir<%ee.load> = load ir<%ee.addr>
+  //     WIDEN vp<%4> = icmp eq ir<%ee.load>, ir<0>
+  //     EMIT vp<%5> = any-of vp<%4>
+  //     EMIT vp<%6> = add vp<%2>, vp<%0>
+  //     EMIT vp<%7> = icmp eq vp<%6>, ir<64>
+  //     EMIT vp<%8> = or vp<%5>, vp<%7>
+  //     EMIT branch-on-cond vp<%8>
+  //   No successors
+  // }
+  // Successor(s): middle.block
+  //
+  // middle.block:
+  // Successor(s): preheader
+  //
+  // preheader:
+  // No successors
+  // }
+
+  // Find the uncounted loop exit condition.
+  auto *Region = Plan.getVectorLoopRegion();
+  VPValue *UncountedCondition = nullptr;
+  if (!match(
+          Region->getExitingBasicBlock()->getTerminator(),
+          m_BranchOnCond(m_OneUse(m_c_BinaryOr(
+              m_OneUse(m_AnyOf(m_VPValue(UncountedCondition))), m_VPValue())))))
+    return std::nullopt;
+
+  SmallVector<VPValue *, 4> Worklist;
+  SmallVector<VPWidenLoadRecipe *, 1> Loads;
+  Worklist.push_back(UncountedCondition);
+  while (!Worklist.empty()) {
+    VPValue *V = Worklist.pop_back_val();
+
+    // Any value defined outside the loop does not need to be copied.
+    if (V->isDefinedOutsideLoopRegions())
+      continue;
+
+    // FIXME: Remove the single user restriction; it's here because we're
+    //        starting with the simplest set of loops we can, and multiple
+    //        users means needing to add PHI nodes in the transform.
+    if (V->getNumUsers() > 1)
+      return std::nullopt;
+
+    // Walk back through recipes until we find at least one load from memory.
+    if (auto *Cmp = dyn_cast<VPWidenRecipe>(V)) {
+      if (Cmp->getOpcode() != Instruction::ICmp)
+        return std::nullopt;
+      Worklist.push_back(Cmp->getOperand(0));
+      Worklist.push_back(Cmp->getOperand(1));
+      Recipes.push_back(Cmp);
+    } else if (auto *Load = dyn_cast<VPWidenLoadRecipe>(V)) {
+      // Reject masked loads for the time being; they make the exit condition
+      // more complex.
+      if (Load->isMasked())
+        return std::nullopt;
+      Loads.push_back(Load);
+    } else
+      return std::nullopt;
+  }
+
+  // Check the loads for exact patterns; for now we only support a contiguous
+  // load based directly on the canonical IV with a step of 1.
+  for (VPWidenLoadRecipe *Load : Loads) {
+    Recipes.push_back(Load);
+    VPValue *GEP = Load->getAddr();
+
+    if (!match(GEP, m_GetElementPtr(
+                        m_LoopInvVPValue(),
+                        m_ScalarIVSteps(m_Specific(Plan.getCanonicalIV()),
+                                        m_SpecificInt(1),
+                                        m_Specific(&Plan.getVF())))))
+      return std::nullopt;
+
+    Recipes.push_back(GEP->getDefiningRecipe());
+    GEPs.push_back(GEP->getDefiningRecipe());
+  }
+
+  return UncountedCondition;
+}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.h b/llvm/lib/Transforms/Vectorize/VPlanUtils.h
@@ -97,6 +97,16 @@ bool isUniformAcrossVFsAndUFs(VPValue *V);
 /// Returns the header block of the first, top-level loop, or null if none
 /// exist.
 VPBasicBlock *getFirstLoopHeader(VPlan &Plan, VPDominatorTree &VPDT);
+
+/// Returns the VPValue representing the uncounted exit comparison used by
+/// AnyOf if the recipes it depends on can be traced back to live-ins and
+/// the canonical IV and it is deemed safe to copy those recipes into the
+/// vector preheader. The recipes are stored in \p Recipes, and recipes
+/// forming an address for a load are also added to \p GEPs.
+std::optional<VPValue *>
+getRecipesForUncountedExit(VPlan &Plan,
+                           SmallVectorImpl<VPRecipeBase *> &Recipes,
+                           SmallVectorImpl<VPRecipeBase *> &GEPs);
 } // namespace vputils
 
 //===----------------------------------------------------------------------===//

diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h
@@ -148,6 +148,8 @@ class LLVM_ABI_FOR_TEST VPValue {
     return Current != user_end();
   }
 
+  bool hasOneUse() const { return getNumUsers() == 1; }
+
   void replaceAllUsesWith(VPValue *New);
 
   /// Go through the uses list for this VPValue and make each use point to \p

diff --git a/llvm/unittests/Transforms/Vectorize/CMakeLists.txt b/llvm/unittests/Transforms/Vectorize/CMakeLists.txt
@@ -14,5 +14,6 @@ add_llvm_unittest(VectorizeTests
   VPlanHCFGTest.cpp
   VPlanPatternMatchTest.cpp
   VPlanSlpTest.cpp
+  VPlanUncountedExitTest.cpp
   VPlanVerifierTest.cpp
   )
diff --git a/llvm/unittests/Transforms/Vectorize/VPlanUncountedExitTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanUncountedExitTest.cpp
@@ -0,0 +1,99 @@
+//===- llvm/unittests/Transforms/Vectorize/VPlanUncountedExitTest.cpp -----===//
+//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "../lib/Transforms/Vectorize/LoopVectorizationPlanner.h"
+#include "../lib/Transforms/Vectorize/VPlan.h"
+#include "../lib/Transforms/Vectorize/VPlanPatternMatch.h"
+#include "../lib/Transforms/Vectorize/VPlanUtils.h"
+#include "VPlanTestBase.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "gtest/gtest.h"
+
+namespace llvm {
+
+namespace {
+class VPUncountedExitTest : public VPlanTestBase {};
+
+TEST_F(VPUncountedExitTest, FindUncountedExitRecipes) {
+  // Create CFG skeleton.
+  VPlan &Plan = getPlan();
+  VPBasicBlock *ScalarPH = Plan.getEntry();
+  VPBasicBlock *Entry = Plan.createVPBasicBlock("entry");
+  Plan.setEntry(Entry);
+  VPBasicBlock *VectorPH = Plan.createVPBasicBlock("vector.ph");
+  VPBasicBlock *VecBody = Plan.createVPBasicBlock("vector.body");
+  VPRegionBlock *Region =
+      Plan.createVPRegionBlock(VecBody, VecBody, "vector loop");
+  VPBasicBlock *MiddleBlock = Plan.createVPBasicBlock("middle.block");
+  VPBlockUtils::connectBlocks(Entry, ScalarPH);
+  VPBlockUtils::connectBlocks(Entry, VectorPH);
+  VPBlockUtils::connectBlocks(VectorPH, Region);
+  VPBlockUtils::connectBlocks(Region, MiddleBlock);
+  VPBlockUtils::connectBlocks(MiddleBlock, ScalarPH);
+
+  // Live-Ins
+  IntegerType *I64Ty = IntegerType::get(C, 64);
+  IntegerType *I32Ty = IntegerType::get(C, 32);
+  PointerType *PTy = PointerType::get(C, 0);
+  VPValue *Zero = Plan.getOrAddLiveIn(ConstantInt::get(I64Ty, 0));
+  VPValue *Inc = Plan.getOrAddLiveIn(ConstantInt::get(I64Ty, 1));
+  VPValue *VF = &Plan.getVF();
+  Plan.setTripCount(Plan.getOrAddLiveIn(ConstantInt::get(I64Ty, 64)));
+
+  // Populate vector.body with the recipes for exiting.
+  auto *IV = new VPCanonicalIVPHIRecipe(Zero, {});
+  VecBody->appendRecipe(IV);
+  VPBuilder Builder(VecBody, VecBody->getFirstNonPhi());
+  auto *Steps = Builder.createScalarIVSteps(Instruction::Add, nullptr, IV, Inc,
+                                            VF, DebugLoc());
+
+  // Uncounted Exit; GEP -> Load -> Cmp
+  auto *DummyGEP = GetElementPtrInst::Create(I32Ty, Zero->getUnderlyingValue(),
+                                             {}, Twine("ee.addr"));
+  auto *GEP = new VPReplicateRecipe(DummyGEP, {Zero, Steps}, true, nullptr);
+  Builder.insert(GEP);
+  auto *DummyLoad =
+      new LoadInst(I32Ty, PoisonValue::get(PTy), "ee.load", false, Align(1));
+  VPValue *Load =
+      new VPWidenLoadRecipe(*DummyLoad, GEP, nullptr, true, false, {}, {});
+  Builder.insert(Load->getDefiningRecipe());
+  // Should really splat the zero, but we're not checking types here.
+  VPValue *Cmp = new VPWidenRecipe(Instruction::ICmp, {Load, Zero},
+                                   VPIRFlags(CmpInst::ICMP_EQ), {}, {});
+  Builder.insert(Cmp->getDefiningRecipe());
+  VPValue *AnyOf = Builder.createNaryOp(VPInstruction::AnyOf, Cmp);
+
+  // Counted Exit; Inc IV -> Cmp
+  VPValue *NextIV = Builder.createNaryOp(Instruction::Add, {IV, VF});
+  VPValue *Counted =
+      Builder.createICmp(CmpInst::ICMP_EQ, NextIV, Plan.getTripCount());
+
+  // Combine, and branch.
+  VPValue *Combined = Builder.createNaryOp(Instruction::Or, {AnyOf, Counted});
+  Builder.createNaryOp(VPInstruction::BranchOnCond, {Combined});
+
+  SmallVector<VPRecipeBase *, 8> Recipes;
+  SmallVector<VPRecipeBase *, 2> GEPs;
+
+  std::optional<VPValue *> UncountedCondition =
+      vputils::getRecipesForUncountedExit(Plan, Recipes, GEPs);
+  ASSERT_TRUE(UncountedCondition.has_value());
+  ASSERT_EQ(*UncountedCondition, Cmp);
+  ASSERT_EQ(GEPs.size(), 1ull);
+  ASSERT_EQ(GEPs[0], GEP);
+  ASSERT_EQ(Recipes.size(), 3ull);
+
+  delete DummyLoad;
+  delete DummyGEP;
+}
+
+} // namespace
+} // namespace llvm