Skip to content

[Coroutines] fix coroutines + std::unique_ptr with async exceptions validation errors #149691

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 10 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
176 changes: 176 additions & 0 deletions llvm/lib/Transforms/Coroutines/CoroFrame.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

#include "CoroInternal.h"
#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/Analysis/StackLifetime.h"
#include "llvm/IR/DIBuilder.h"
Expand All @@ -35,6 +36,7 @@
#include "llvm/Transforms/Coroutines/SpillUtils.h"
#include "llvm/Transforms/Coroutines/SuspendCrossingInfo.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/PromoteMemToReg.h"
#include <algorithm>
Expand Down Expand Up @@ -973,6 +975,178 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape,
return FrameTy;
}

// This function assumes that it is being called on basic block in reversed
// post-order, meaning predecessors are visited before successors failing to do
// so will cause VMap to be non-valid and will cause instructions to fail
// mapping to their corresponding clones
static void finalizeBasicBlockCloneAndTrackSuccessors(
BasicBlock *InitialBlock, BasicBlock *ClonedBlock, ValueToValueMapTy &VMap,
SmallSet<BasicBlock *, 20> &SuccessorBlocksSet) {
// This code will examine the basic block, fix issues caused by clones
// for example - tailor cleanupret to the corresponding cleanuppad
// it will use VMap to do so
// in addition, it will add the node successors to SuccessorBlocksSet

// Remap the instructions, VMap here aggregates instructions across
// multiple BasicBlocks, and we assume that traversal is reversed post-order,
// therefore successor blocks (for example instructions having funclet
// tags) will be mapped correctly to the new cloned cleanuppad
for (Instruction &ClonedBlockInstruction : *ClonedBlock) {
RemapInstruction(&ClonedBlockInstruction, VMap,
RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
}

const auto &InitialBlockTerminator = InitialBlock->getTerminator();

// If it's cleanupret, find the correspondant cleanuppad (use the VMap to
// find it).
if (auto *InitialBlockTerminatorCleanupReturn =
dyn_cast<CleanupReturnInst>(InitialBlockTerminator)) {
// if none found do nothing
if (VMap.find(InitialBlockTerminatorCleanupReturn->getCleanupPad()) ==
VMap.end()) {
return;
}

auto *ClonedBlockTerminatorCleanupReturn =
cast<CleanupReturnInst>(ClonedBlock->getTerminator());

// Assuming reversed post-order traversal
llvm::Value *ClonedBlockCleanupPadValue =
VMap[InitialBlockTerminatorCleanupReturn->getCleanupPad()];
auto *ClonedBlockCleanupPad =
cast<CleanupPadInst>(ClonedBlockCleanupPadValue);
ClonedBlockTerminatorCleanupReturn->setCleanupPad(ClonedBlockCleanupPad);

// If it's a branch/invoke, keep track of its successors, we want to
// calculate dominance between CoroBegin and them also
} else if (auto *InitialBlockTerminatorBranch =
dyn_cast<BranchInst>(InitialBlockTerminator)) {
for (unsigned int successorIdx = 0;
successorIdx < InitialBlockTerminatorBranch->getNumSuccessors();
++successorIdx) {
SuccessorBlocksSet.insert(
InitialBlockTerminatorBranch->getSuccessor(successorIdx));
}
} else if (auto *InitialBlockTerminatorInvoke =
dyn_cast<InvokeInst>(InitialBlockTerminator)) {
SuccessorBlocksSet.insert(InitialBlockTerminatorInvoke->getUnwindDest());
} else if (isa<ReturnInst>(InitialBlockTerminator)) {
// No action needed
} else {
InitialBlockTerminator->print(dbgs());
report_fatal_error("Terminator is not implemented in "
"finalizeBasicBlockCloneAndTrackSuccessors");
}
}

// Dominance issue fixer for each predecessor satisfying predicate function
void splitIfBasicBlockPredecessors(
BasicBlock *InitialBlock, BasicBlock *ReplacementBlock,
std::function<bool(BasicBlock *)> Predicate) {

SmallVector<BasicBlock *> InitialBlockPredecessors;

auto Predecessors = predecessors(InitialBlock);
std::copy_if(Predecessors.begin(), Predecessors.end(),
std::back_inserter(InitialBlockPredecessors), Predicate);

// Fixups on the predecessors terminator - point them to ReplacementBlock.
for (auto *InitialBlockPredecessor : InitialBlockPredecessors) {
auto *InitialBlockPredecessorTerminator =
InitialBlockPredecessor->getTerminator();
if (auto *InitialBlockPredecessorTerminatorInvoke =
dyn_cast<InvokeInst>(InitialBlockPredecessorTerminator)) {
if (InitialBlock ==
InitialBlockPredecessorTerminatorInvoke->getUnwindDest()) {
InitialBlockPredecessorTerminatorInvoke->setUnwindDest(
ReplacementBlock);
} else {
InitialBlockPredecessorTerminatorInvoke->setNormalDest(
ReplacementBlock);
}
} else if (auto *InitialBlockPredecessorTerminatorBranch =
dyn_cast<BranchInst>(InitialBlockPredecessorTerminator)) {
for (unsigned int successorIdx = 0;
successorIdx <
InitialBlockPredecessorTerminatorBranch->getNumSuccessors();
++successorIdx) {
if (InitialBlock ==
InitialBlockPredecessorTerminatorBranch->getSuccessor(
successorIdx)) {
InitialBlockPredecessorTerminatorBranch->setSuccessor(
successorIdx, ReplacementBlock);
}
}
} else if (auto *InitialBlockPredecessorTerminatorCleanupReturn =
dyn_cast<CleanupReturnInst>(
InitialBlockPredecessorTerminator)) {
InitialBlockPredecessorTerminatorCleanupReturn->setUnwindDest(
ReplacementBlock);
} else {
InitialBlockPredecessorTerminator->print(dbgs());
report_fatal_error(
"Terminator is not implemented in splitIfBasicBlockPredecessors");
}
}
}

// Fixer for the "Instruction does not dominate all uses!" bug
// The fix consists of mapping problematic paths (where CoroBegin does not
// dominate cleanup nodes) and duplicates them to 2 flows - the one that
// insertSpills intended to create (using the spill) and another one, preserving
// the logics of pre-splitting, which would be triggered if unwinding happened
// before CoroBegin
static void
splitBasicBlocksNotDominatedByCoroBegin(const FrameDataInfo &FrameData,
const coro::Shape &Shape, Function *F,
DominatorTree &DT) {
ValueToValueMapTy VMap;
SmallSet<BasicBlock *, 20> SpillUserBlocksSet;

// Prepare the node set, logics will be run only on those nodes
for (const auto &E : FrameData.Spills) {
for (auto *U : E.second) {
auto CurrentBlock = U->getParent();
if (!DT.dominates(Shape.CoroBegin, CurrentBlock)) {
SpillUserBlocksSet.insert(CurrentBlock);
}
}
}

// Run is in reversed post order, to enforce visiting predecessors before
// successors
for (BasicBlock *CurrentBlock : ReversePostOrderTraversal<Function *>(F)) {
if (!SpillUserBlocksSet.contains(CurrentBlock)) {
continue;
}
SpillUserBlocksSet.erase(CurrentBlock);

// Preserve the current node. the duplicate will become the unspilled
// alternative
auto UnspilledAlternativeBlock =
CloneBasicBlock(CurrentBlock, VMap, ".unspilled_alternative", F);

// Remap node instructions, keep track of successors to visit them in next
// iterations
finalizeBasicBlockCloneAndTrackSuccessors(
CurrentBlock, UnspilledAlternativeBlock, VMap, SpillUserBlocksSet);

// Split only if predecessor breaks dominance against CoroBegin
splitIfBasicBlockPredecessors(CurrentBlock, UnspilledAlternativeBlock,
[&DT, &Shape](BasicBlock *PredecessorNode) {
return !DT.dominates(Shape.CoroBegin,
PredecessorNode);
});

// We changed dominance tree, so recalculate
DT.recalculate(*F);
}

assert(SpillUserBlocksSet.empty() &&
"Graph is corrupted by SpillUserBlocksSet");
}

// Replace all alloca and SSA values that are accessed across suspend points
// with GetElementPointer from coroutine frame + loads and stores. Create an
// AllocaSpillBB that will become the new entry block for the resume parts of
Expand Down Expand Up @@ -1052,6 +1226,8 @@ static void insertSpills(const FrameDataInfo &FrameData, coro::Shape &Shape) {
return GEP;
};

splitBasicBlocksNotDominatedByCoroBegin(FrameData, Shape, F, DT);

for (auto const &E : FrameData.Spills) {
Value *Def = E.first;
auto SpillAlignment = Align(FrameData.getAlign(Def));
Expand Down
11 changes: 10 additions & 1 deletion llvm/lib/Transforms/Coroutines/CoroSplit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -376,7 +376,16 @@ static void replaceUnwindCoroEnd(AnyCoroEndInst *End, const coro::Shape &Shape,
// If coro.end has an associated bundle, add cleanupret instruction.
if (auto Bundle = End->getOperandBundle(LLVMContext::OB_funclet)) {
auto *FromPad = cast<CleanupPadInst>(Bundle->Inputs[0]);
auto *CleanupRet = Builder.CreateCleanupRet(FromPad, nullptr);

// If the terminator is an invoke,
// set the cleanupret unwind destination the same as the other edges, to
// avoid validation errors
BasicBlock *UBB = nullptr;
if (auto II = dyn_cast<InvokeInst>(FromPad->getParent()->getTerminator())) {
UBB = II->getUnwindDest();
}

auto *CleanupRet = Builder.CreateCleanupRet(FromPad, UBB);
End->getParent()->splitBasicBlock(End);
CleanupRet->getParent()->getTerminator()->eraseFromParent();
}
Expand Down
45 changes: 45 additions & 0 deletions llvm/test/Transforms/Coroutines/pr148035_inst_does_not_dominate.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
; In coro-split, this coroutine code reduced IR, produced using clang with async-exceptions
; crashed before fix because of the validation mismatch of Instruction does not dominate all uses!
; RUN: opt < %s -passes='coro-split' -S 2>&1 | FileCheck %s --implicit-check-not="Instruction does not dominate all uses!"
; RUN: opt < %s -passes='default<Os>,coro-split' -S 2>&1 | FileCheck %s --implicit-check-not="Instruction does not dominate all uses!"

; Function Attrs: presplitcoroutine
define i8 @"?resuming_on_new_thread@@YA?AUtask@@Vunique_ptr@@@Z"(ptr %0) #0 personality ptr null {
invoke void @llvm.seh.scope.begin()
to label %2 unwind label %14

2: ; preds = %1
%3 = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
%4 = load volatile ptr, ptr null, align 8
%5 = call ptr @llvm.coro.begin(token %3, ptr %4)
%6 = call token @llvm.coro.save(ptr null)
%7 = call i8 @llvm.coro.suspend(token none, i1 false)
invoke void @llvm.seh.try.begin()
to label %common.ret unwind label %8

common.ret: ; preds = %12, %10, %2
ret i8 0

8: ; preds = %2
%9 = catchswitch within none [label %10] unwind label %12

10: ; preds = %8
%11 = catchpad within %9 [ptr null, i32 0, ptr null]
br label %common.ret

12: ; preds = %8
%13 = cleanuppad within none []
invoke void @llvm.seh.scope.end()
to label %common.ret unwind label %14

14: ; preds = %12, %1
%15 = cleanuppad within none []
store i32 0, ptr %0, align 4
cleanupret from %15 unwind to caller
}

attributes #0 = { presplitcoroutine }

!llvm.module.flags = !{!0}

!0 = !{i32 2, !"eh-asynch", i32 1}
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
; In coro-split, this coroutine code reduced IR, produced using clang with async-exceptions
; crashed before fix because of the validation mismatch of Instruction does not dominate all uses!
; RUN: opt < %s -passes='coro-split' -S 2>&1 | FileCheck %s --implicit-check-not="Instruction does not dominate all uses!"
; RUN: opt < %s -passes='default<Os>,coro-split' -S 2>&1 | FileCheck %s --implicit-check-not="Instruction does not dominate all uses!"

; Function Attrs: presplitcoroutine
define i8 @"?resuming_on_new_thread@@YA?AUtask@@Vunique_ptr@@@Z"(ptr %0) #0 personality ptr null {
invoke void @llvm.seh.scope.begin()
to label %2 unwind label %14

2: ; preds = %1
%3 = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
%4 = load volatile ptr, ptr null, align 8
%5 = call ptr @llvm.coro.begin(token %3, ptr %4)
%6 = call token @llvm.coro.save(ptr null)
%7 = call i8 @llvm.coro.suspend(token none, i1 false)
invoke void @llvm.seh.try.begin()
to label %common.ret unwind label %8

common.ret: ; preds = %12, %10, %2
ret i8 0

8: ; preds = %2
%9 = catchswitch within none [label %10] unwind label %12

10: ; preds = %8
%11 = catchpad within %9 [ptr null, i32 0, ptr null]
br label %common.ret

12: ; preds = %8
%13 = cleanuppad within none []
invoke void @llvm.seh.scope.end()
to label %common.ret unwind label %14

14: ; preds = %12, %1
%15 = cleanuppad within none []
store i32 0, ptr %0, align 4
br label %common.ret
}

attributes #0 = { presplitcoroutine }

!llvm.module.flags = !{!0}

!0 = !{i32 2, !"eh-asynch", i32 1}
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
; In coro-split, this coroutine code reduced IR, produced using clang with async-exceptions
; crashed after first phase of fix because the terminator cleanupret was not implemented on predecessor fixer at the time
; RUN: opt < %s -passes='coro-split' -S
; RUN: opt < %s -passes='default<Os>,coro-split' -S

; Function Attrs: presplitcoroutine
define i8 @"?resuming_on_new_thread@@YA?AUtask@@V?$unique_ptr@HU?$default_delete@H@std@@@std@@0@Z"(ptr %0) #0 personality ptr null {
invoke void @llvm.seh.scope.begin()
to label %2 unwind label %15

2: ; preds = %1
%3 = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
%4 = call ptr @llvm.coro.begin(token %3, ptr null)
%5 = call token @llvm.coro.save(ptr null)
%6 = call i8 @llvm.coro.suspend(token none, i1 false)
invoke void @llvm.seh.try.begin()
to label %7 unwind label %8

7: ; preds = %2
ret i8 0

8: ; preds = %2
%9 = catchswitch within none [label %10] unwind label %12

10: ; preds = %8
%11 = catchpad within %9 [ptr null, i32 0, ptr null]
ret i8 0

12: ; preds = %8
%13 = cleanuppad within none []
invoke void @llvm.seh.scope.end()
to label %14 unwind label %15

14: ; preds = %12
ret i8 0

15: ; preds = %12, %1
%16 = cleanuppad within none []
cleanupret from %16 unwind label %17

17: ; preds = %15
%18 = cleanuppad within none []
store i32 0, ptr %0, align 4
cleanupret from %18 unwind to caller
}

attributes #0 = { presplitcoroutine }

!llvm.module.flags = !{!0}

!0 = !{i32 2, !"eh-asynch", i32 1}
Loading