Skip to content

[LV] Transform to handle exits in the scalar loop #148626

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 18 additions & 6 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -400,6 +400,10 @@ static cl::opt<bool> EnableEarlyExitVectorization(
cl::desc(
"Enable vectorization of early exit loops with uncountable exits."));

static cl::opt<bool> HandleEarlyExitsInScalarTail(
"handle-early-exits-in-scalar-tail", cl::init(false), cl::Hidden,
cl::desc("Use the scalar tail to deal with early exit logic"));

// Likelyhood of bypassing the vectorized loop because there are zero trips left
// after prolog. See `emitIterationCountCheck`.
static constexpr uint32_t MinItersBypassWeights[] = {1, 127};
Expand Down Expand Up @@ -491,8 +495,8 @@ class InnerLoopVectorizer {
AC(AC), ORE(ORE), VF(VecWidth),
MinProfitableTripCount(MinProfitableTripCount), UF(UnrollFactor),
Builder(PSE.getSE()->getContext()), Cost(CM), BFI(BFI), PSI(PSI),
RTChecks(RTChecks), Plan(Plan),
VectorPHVPB(Plan.getVectorLoopRegion()->getSinglePredecessor()) {}
RTChecks(RTChecks), Plan(Plan), VectorPHVPB(Plan.getVectorPreheader()) {
}

virtual ~InnerLoopVectorizer() = default;

Expand Down Expand Up @@ -8322,6 +8326,7 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,

auto MaxVFTimes2 = MaxVF * 2;
auto VPlan0 = VPlanTransforms::buildPlainCFG(OrigLoop, *LI);
VPlan0->setEarlyExitContinuesInScalarLoop(HandleEarlyExitsInScalarTail);
for (ElementCount VF = MinVF; ElementCount::isKnownLT(VF, MaxVFTimes2);) {
VFRange SubRange = {VF, MaxVFTimes2};
if (auto Plan = tryToBuildVPlanWithVPRecipes(
Expand All @@ -8338,6 +8343,14 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
!VPlanTransforms::runPass(VPlanTransforms::tryAddExplicitVectorLength,
*Plan, CM.getMaxSafeElements()))
break;
// See if we can convert an early exit vplan to bail out to a scalar
// loop if state-changing operations (like stores) are present and
// an exit will be taken in the next vector iteration.
// If not, discard the plan.
if (HandleEarlyExitsInScalarTail && !HasScalarVF &&
!VPlanTransforms::runPass(VPlanTransforms::handleExitsInScalarLoop,
*Plan))
break;
assert(verifyVPlanIsValid(*Plan) && "VPlan is invalid");
VPlans.push_back(std::move(Plan));
}
Expand Down Expand Up @@ -8391,8 +8404,7 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan,
auto *ScalarPH = Plan.getScalarPreheader();
auto *MiddleVPBB = cast<VPBasicBlock>(ScalarPH->getPredecessors()[0]);
VPRegionBlock *VectorRegion = Plan.getVectorLoopRegion();
VPBuilder VectorPHBuilder(
cast<VPBasicBlock>(VectorRegion->getSinglePredecessor()));
VPBuilder VectorPHBuilder(cast<VPBasicBlock>(Plan.getVectorPreheader()));
VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
VPBuilder ScalarPHBuilder(ScalarPH);
for (VPRecipeBase &ScalarPhiR : Plan.getScalarHeader()->phis()) {
Expand Down Expand Up @@ -8839,8 +8851,7 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
auto CanUseVersionedStride = [&Plan](VPUser &U, unsigned) {
auto *R = cast<VPRecipeBase>(&U);
return R->getParent()->getParent() ||
R->getParent() ==
Plan->getVectorLoopRegion()->getSinglePredecessor();
R->getParent() == Plan->getVectorPreheader();
};
for (auto [_, Stride] : Legal->getLAI()->getSymbolicStrides()) {
auto *StrideV = cast<SCEVUnknown>(Stride)->getValue();
Expand Down Expand Up @@ -8906,6 +8917,7 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) {
assert(EnableVPlanNativePath && "VPlan-native path is not enabled.");

auto Plan = VPlanTransforms::buildPlainCFG(OrigLoop, *LI);
Plan->setEarlyExitContinuesInScalarLoop(HandleEarlyExitsInScalarTail);
VPlanTransforms::prepareForVectorization(
*Plan, Legal->getWidestInductionType(), PSE, true, false, OrigLoop,
getDebugLocFromInstOrOperands(Legal->getPrimaryInduction()), false,
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1291,6 +1291,8 @@ VPlan *VPlan::duplicate() {
NewPlan->ExitBlocks.push_back(cast<VPIRBasicBlock>(VPB));
}

NewPlan->setEarlyExitContinuesInScalarLoop(EarlyExitContinuesInScalarLoop);

return NewPlan;
}

Expand Down
24 changes: 24 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -3883,6 +3883,14 @@ class VPlan {
/// VPlan is destroyed.
SmallVector<VPBlockBase *> CreatedBlocks;

/// The entry block in a vplan, which may be a check block that needs to
/// be wired up in the right place with existing check blocks.
std::optional<VPBasicBlock *> EarlyExitPreheader;

/// Indicates that an early exit loop will exit before the condition is
/// reached, and that the scalar loop must perform the last few iterations.
bool EarlyExitContinuesInScalarLoop = false;

/// Construct a VPlan with \p Entry to the plan and with \p ScalarHeader
/// wrapping the original header of the scalar loop.
VPlan(VPBasicBlock *Entry, VPIRBasicBlock *ScalarHeader)
Expand Down Expand Up @@ -3929,12 +3937,17 @@ class VPlan {
/// Returns the preheader of the vector loop region, if one exists, or null
/// otherwise.
VPBasicBlock *getVectorPreheader() {
if (EarlyExitPreheader)
return *EarlyExitPreheader;
VPRegionBlock *VectorRegion = getVectorLoopRegion();
return VectorRegion
? cast<VPBasicBlock>(VectorRegion->getSinglePredecessor())
: nullptr;
}

/// Overrides the current vplan preheader block.
void setEarlyExitPreheader(VPBasicBlock *BB) { EarlyExitPreheader = BB; }

/// Returns the VPRegionBlock of the vector loop.
LLVM_ABI_FOR_TEST VPRegionBlock *getVectorLoopRegion();
LLVM_ABI_FOR_TEST const VPRegionBlock *getVectorLoopRegion() const;
Expand Down Expand Up @@ -4187,6 +4200,17 @@ class VPlan {
(ExitBlocks.size() == 1 && ExitBlocks[0]->getNumPredecessors() > 1);
}

/// Returns true if the vector iteration containing an exit should be handled
/// in the scalar loop instead of by masking.
bool shouldEarlyExitContinueInScalarLoop() const {
return EarlyExitContinuesInScalarLoop;
}

/// If set to true, early exits should be handled in the scalar loop.
void setEarlyExitContinuesInScalarLoop(bool Continues) {
EarlyExitContinuesInScalarLoop = Continues;
}

/// Returns true if the scalar tail may execute after the vector loop. Note
/// that this relies on unneeded branches to the scalar tail loop being
/// removed.
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -502,6 +502,9 @@ void VPlanTransforms::prepareForVectorization(
cast<VPBasicBlock>(HeaderVPB),
cast<VPBasicBlock>(LatchVPB), Range);
HandledUncountableEarlyExit = true;
if (Plan.shouldEarlyExitContinueInScalarLoop())
for (VPRecipeBase &R : EB->phis())
cast<VPIRPhi>(&R)->removeIncomingValueFor(Pred);
} else {
for (VPRecipeBase &R : EB->phis())
cast<VPIRPhi>(&R)->removeIncomingValueFor(Pred);
Expand Down
31 changes: 31 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
Original file line number Diff line number Diff line change
Expand Up @@ -632,6 +632,37 @@ m_Intrinsic(const T0 &Op0, const T1 &Op1, const T2 &Op2, const T3 &Op3) {
return m_CombineAnd(m_Intrinsic<IntrID>(Op0, Op1, Op2), m_Argument<3>(Op3));
}

struct loop_invariant_vpvalue {
template <typename ITy> bool match(ITy *V) const {
VPValue *Val = dyn_cast<VPValue>(V);
return Val && Val->isDefinedOutsideLoopRegions();
}
};

inline loop_invariant_vpvalue m_LoopInvVPValue() {
return loop_invariant_vpvalue();
}

template <typename Op0_t>
inline UnaryVPInstruction_match<Op0_t, VPInstruction::AnyOf>
m_AnyOf(const Op0_t &Op0) {
return m_VPInstruction<VPInstruction::AnyOf>(Op0);
}

template <typename SubPattern_t> struct OneUse_match {
SubPattern_t SubPattern;

OneUse_match(const SubPattern_t &SP) : SubPattern(SP) {}

template <typename OpTy> bool match(OpTy *V) {
return V->hasOneUse() && SubPattern.match(V);
}
};

template <typename T> inline OneUse_match<T> m_OneUse(const T &SubPattern) {
return SubPattern;
}

} // namespace VPlanPatternMatch
} // namespace llvm

Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,7 @@ bool VPRecipeBase::isPhi() const {
return (getVPDefID() >= VPFirstPHISC && getVPDefID() <= VPLastPHISC) ||
(isa<VPInstruction>(this) &&
cast<VPInstruction>(this)->getOpcode() == Instruction::PHI) ||
isa<VPIRPhi>(this);
isa<VPPhi>(this) || isa<VPIRPhi>(this);
}

bool VPRecipeBase::isScalarCast() const {
Expand Down
Loading
Loading