diff --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp index 332050860e05b..133f3b400b06e 100644 --- a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp @@ -17,6 +17,7 @@ #include "CoroInternal.h" #include "llvm/ADT/ScopeExit.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallString.h" #include "llvm/Analysis/StackLifetime.h" #include "llvm/IR/DIBuilder.h" @@ -35,6 +36,7 @@ #include "llvm/Transforms/Coroutines/SpillUtils.h" #include "llvm/Transforms/Coroutines/SuspendCrossingInfo.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/PromoteMemToReg.h" #include @@ -973,6 +975,178 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape, return FrameTy; } +// This function assumes that it is being called on basic block in reversed +// post-order, meaning predecessors are visited before successors failing to do +// so will cause VMap to be non-valid and will cause instructions to fail +// mapping to their corresponding clones +static void finalizeBasicBlockCloneAndTrackSuccessors( + BasicBlock *InitialBlock, BasicBlock *ClonedBlock, ValueToValueMapTy &VMap, + SmallSet &SuccessorBlocksSet) { + // This code will examine the basic block, fix issues caused by clones + // for example - tailor cleanupret to the corresponding cleanuppad + // it will use VMap to do so + // in addition, it will add the node successors to SuccessorBlocksSet + + // Remap the instructions, VMap here aggregates instructions across + // multiple BasicBlocks, and we assume that traversal is reversed post-order, + // therefore successor blocks (for example instructions having funclet + // tags) will be mapped correctly to the new cloned cleanuppad + for (Instruction &ClonedBlockInstruction : *ClonedBlock) { + RemapInstruction(&ClonedBlockInstruction, VMap, + RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); + } + + const auto &InitialBlockTerminator = InitialBlock->getTerminator(); + + // If it's cleanupret, find the correspondant cleanuppad (use the VMap to + // find it). + if (auto *InitialBlockTerminatorCleanupReturn = + dyn_cast(InitialBlockTerminator)) { + // if none found do nothing + if (VMap.find(InitialBlockTerminatorCleanupReturn->getCleanupPad()) == + VMap.end()) { + return; + } + + auto *ClonedBlockTerminatorCleanupReturn = + cast(ClonedBlock->getTerminator()); + + // Assuming reversed post-order traversal + llvm::Value *ClonedBlockCleanupPadValue = + VMap[InitialBlockTerminatorCleanupReturn->getCleanupPad()]; + auto *ClonedBlockCleanupPad = + cast(ClonedBlockCleanupPadValue); + ClonedBlockTerminatorCleanupReturn->setCleanupPad(ClonedBlockCleanupPad); + + // If it's a branch/invoke, keep track of its successors, we want to + // calculate dominance between CoroBegin and them also + } else if (auto *InitialBlockTerminatorBranch = + dyn_cast(InitialBlockTerminator)) { + for (unsigned int successorIdx = 0; + successorIdx < InitialBlockTerminatorBranch->getNumSuccessors(); + ++successorIdx) { + SuccessorBlocksSet.insert( + InitialBlockTerminatorBranch->getSuccessor(successorIdx)); + } + } else if (auto *InitialBlockTerminatorInvoke = + dyn_cast(InitialBlockTerminator)) { + SuccessorBlocksSet.insert(InitialBlockTerminatorInvoke->getUnwindDest()); + } else if (isa(InitialBlockTerminator)) { + // No action needed + } else { + InitialBlockTerminator->print(dbgs()); + report_fatal_error("Terminator is not implemented in " + "finalizeBasicBlockCloneAndTrackSuccessors"); + } +} + +// Dominance issue fixer for each predecessor satisfying predicate function +void splitIfBasicBlockPredecessors( + BasicBlock *InitialBlock, BasicBlock *ReplacementBlock, + std::function Predicate) { + + SmallVector InitialBlockPredecessors; + + auto Predecessors = predecessors(InitialBlock); + std::copy_if(Predecessors.begin(), Predecessors.end(), + std::back_inserter(InitialBlockPredecessors), Predicate); + + // Fixups on the predecessors terminator - point them to ReplacementBlock. + for (auto *InitialBlockPredecessor : InitialBlockPredecessors) { + auto *InitialBlockPredecessorTerminator = + InitialBlockPredecessor->getTerminator(); + if (auto *InitialBlockPredecessorTerminatorInvoke = + dyn_cast(InitialBlockPredecessorTerminator)) { + if (InitialBlock == + InitialBlockPredecessorTerminatorInvoke->getUnwindDest()) { + InitialBlockPredecessorTerminatorInvoke->setUnwindDest( + ReplacementBlock); + } else { + InitialBlockPredecessorTerminatorInvoke->setNormalDest( + ReplacementBlock); + } + } else if (auto *InitialBlockPredecessorTerminatorBranch = + dyn_cast(InitialBlockPredecessorTerminator)) { + for (unsigned int successorIdx = 0; + successorIdx < + InitialBlockPredecessorTerminatorBranch->getNumSuccessors(); + ++successorIdx) { + if (InitialBlock == + InitialBlockPredecessorTerminatorBranch->getSuccessor( + successorIdx)) { + InitialBlockPredecessorTerminatorBranch->setSuccessor( + successorIdx, ReplacementBlock); + } + } + } else if (auto *InitialBlockPredecessorTerminatorCleanupReturn = + dyn_cast( + InitialBlockPredecessorTerminator)) { + InitialBlockPredecessorTerminatorCleanupReturn->setUnwindDest( + ReplacementBlock); + } else { + InitialBlockPredecessorTerminator->print(dbgs()); + report_fatal_error( + "Terminator is not implemented in splitIfBasicBlockPredecessors"); + } + } +} + +// Fixer for the "Instruction does not dominate all uses!" bug +// The fix consists of mapping problematic paths (where CoroBegin does not +// dominate cleanup nodes) and duplicates them to 2 flows - the one that +// insertSpills intended to create (using the spill) and another one, preserving +// the logics of pre-splitting, which would be triggered if unwinding happened +// before CoroBegin +static void +splitBasicBlocksNotDominatedByCoroBegin(const FrameDataInfo &FrameData, + const coro::Shape &Shape, Function *F, + DominatorTree &DT) { + ValueToValueMapTy VMap; + SmallSet SpillUserBlocksSet; + + // Prepare the node set, logics will be run only on those nodes + for (const auto &E : FrameData.Spills) { + for (auto *U : E.second) { + auto CurrentBlock = U->getParent(); + if (!DT.dominates(Shape.CoroBegin, CurrentBlock)) { + SpillUserBlocksSet.insert(CurrentBlock); + } + } + } + + // Run is in reversed post order, to enforce visiting predecessors before + // successors + for (BasicBlock *CurrentBlock : ReversePostOrderTraversal(F)) { + if (!SpillUserBlocksSet.contains(CurrentBlock)) { + continue; + } + SpillUserBlocksSet.erase(CurrentBlock); + + // Preserve the current node. the duplicate will become the unspilled + // alternative + auto UnspilledAlternativeBlock = + CloneBasicBlock(CurrentBlock, VMap, ".unspilled_alternative", F); + + // Remap node instructions, keep track of successors to visit them in next + // iterations + finalizeBasicBlockCloneAndTrackSuccessors( + CurrentBlock, UnspilledAlternativeBlock, VMap, SpillUserBlocksSet); + + // Split only if predecessor breaks dominance against CoroBegin + splitIfBasicBlockPredecessors(CurrentBlock, UnspilledAlternativeBlock, + [&DT, &Shape](BasicBlock *PredecessorNode) { + return !DT.dominates(Shape.CoroBegin, + PredecessorNode); + }); + + // We changed dominance tree, so recalculate + DT.recalculate(*F); + } + + assert(SpillUserBlocksSet.empty() && + "Graph is corrupted by SpillUserBlocksSet"); +} + // Replace all alloca and SSA values that are accessed across suspend points // with GetElementPointer from coroutine frame + loads and stores. Create an // AllocaSpillBB that will become the new entry block for the resume parts of @@ -1052,6 +1226,8 @@ static void insertSpills(const FrameDataInfo &FrameData, coro::Shape &Shape) { return GEP; }; + splitBasicBlocksNotDominatedByCoroBegin(FrameData, Shape, F, DT); + for (auto const &E : FrameData.Spills) { Value *Def = E.first; auto SpillAlignment = Align(FrameData.getAlign(Def)); diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp index 64b33e46404f0..047ec57ae7c02 100644 --- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp @@ -376,7 +376,16 @@ static void replaceUnwindCoroEnd(AnyCoroEndInst *End, const coro::Shape &Shape, // If coro.end has an associated bundle, add cleanupret instruction. if (auto Bundle = End->getOperandBundle(LLVMContext::OB_funclet)) { auto *FromPad = cast(Bundle->Inputs[0]); - auto *CleanupRet = Builder.CreateCleanupRet(FromPad, nullptr); + + // If the terminator is an invoke, + // set the cleanupret unwind destination the same as the other edges, to + // avoid validation errors + BasicBlock *UBB = nullptr; + if (auto II = dyn_cast(FromPad->getParent()->getTerminator())) { + UBB = II->getUnwindDest(); + } + + auto *CleanupRet = Builder.CreateCleanupRet(FromPad, UBB); End->getParent()->splitBasicBlock(End); CleanupRet->getParent()->getTerminator()->eraseFromParent(); } diff --git a/llvm/test/Transforms/Coroutines/pr148035_inst_does_not_dominate.ll b/llvm/test/Transforms/Coroutines/pr148035_inst_does_not_dominate.ll new file mode 100644 index 0000000000000..64de2d2b10be0 --- /dev/null +++ b/llvm/test/Transforms/Coroutines/pr148035_inst_does_not_dominate.ll @@ -0,0 +1,45 @@ +; In coro-split, this coroutine code reduced IR, produced using clang with async-exceptions +; crashed before fix because of the validation mismatch of Instruction does not dominate all uses! +; RUN: opt < %s -passes='coro-split' -S 2>&1 | FileCheck %s --implicit-check-not="Instruction does not dominate all uses!" +; RUN: opt < %s -passes='default,coro-split' -S 2>&1 | FileCheck %s --implicit-check-not="Instruction does not dominate all uses!" + +; Function Attrs: presplitcoroutine +define i8 @"?resuming_on_new_thread@@YA?AUtask@@Vunique_ptr@@@Z"(ptr %0) #0 personality ptr null { + invoke void @llvm.seh.scope.begin() + to label %2 unwind label %14 + +2: ; preds = %1 + %3 = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null) + %4 = load volatile ptr, ptr null, align 8 + %5 = call ptr @llvm.coro.begin(token %3, ptr %4) + %6 = call token @llvm.coro.save(ptr null) + %7 = call i8 @llvm.coro.suspend(token none, i1 false) + invoke void @llvm.seh.try.begin() + to label %common.ret unwind label %8 + +common.ret: ; preds = %12, %10, %2 + ret i8 0 + +8: ; preds = %2 + %9 = catchswitch within none [label %10] unwind label %12 + +10: ; preds = %8 + %11 = catchpad within %9 [ptr null, i32 0, ptr null] + br label %common.ret + +12: ; preds = %8 + %13 = cleanuppad within none [] + invoke void @llvm.seh.scope.end() + to label %common.ret unwind label %14 + +14: ; preds = %12, %1 + %15 = cleanuppad within none [] + store i32 0, ptr %0, align 4 + cleanupret from %15 unwind to caller +} + +attributes #0 = { presplitcoroutine } + +!llvm.module.flags = !{!0} + +!0 = !{i32 2, !"eh-asynch", i32 1} diff --git a/llvm/test/Transforms/Coroutines/pr148035_inst_does_not_dominate_2.ll b/llvm/test/Transforms/Coroutines/pr148035_inst_does_not_dominate_2.ll new file mode 100644 index 0000000000000..d327f8853db5a --- /dev/null +++ b/llvm/test/Transforms/Coroutines/pr148035_inst_does_not_dominate_2.ll @@ -0,0 +1,45 @@ +; In coro-split, this coroutine code reduced IR, produced using clang with async-exceptions +; crashed before fix because of the validation mismatch of Instruction does not dominate all uses! +; RUN: opt < %s -passes='coro-split' -S 2>&1 | FileCheck %s --implicit-check-not="Instruction does not dominate all uses!" +; RUN: opt < %s -passes='default,coro-split' -S 2>&1 | FileCheck %s --implicit-check-not="Instruction does not dominate all uses!" + +; Function Attrs: presplitcoroutine +define i8 @"?resuming_on_new_thread@@YA?AUtask@@Vunique_ptr@@@Z"(ptr %0) #0 personality ptr null { + invoke void @llvm.seh.scope.begin() + to label %2 unwind label %14 + +2: ; preds = %1 + %3 = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null) + %4 = load volatile ptr, ptr null, align 8 + %5 = call ptr @llvm.coro.begin(token %3, ptr %4) + %6 = call token @llvm.coro.save(ptr null) + %7 = call i8 @llvm.coro.suspend(token none, i1 false) + invoke void @llvm.seh.try.begin() + to label %common.ret unwind label %8 + +common.ret: ; preds = %12, %10, %2 + ret i8 0 + +8: ; preds = %2 + %9 = catchswitch within none [label %10] unwind label %12 + +10: ; preds = %8 + %11 = catchpad within %9 [ptr null, i32 0, ptr null] + br label %common.ret + +12: ; preds = %8 + %13 = cleanuppad within none [] + invoke void @llvm.seh.scope.end() + to label %common.ret unwind label %14 + +14: ; preds = %12, %1 + %15 = cleanuppad within none [] + store i32 0, ptr %0, align 4 + br label %common.ret +} + +attributes #0 = { presplitcoroutine } + +!llvm.module.flags = !{!0} + +!0 = !{i32 2, !"eh-asynch", i32 1} diff --git a/llvm/test/Transforms/Coroutines/pr148035_not_implemented_terminator_for_pred.ll b/llvm/test/Transforms/Coroutines/pr148035_not_implemented_terminator_for_pred.ll new file mode 100644 index 0000000000000..7b16b9c248a27 --- /dev/null +++ b/llvm/test/Transforms/Coroutines/pr148035_not_implemented_terminator_for_pred.ll @@ -0,0 +1,51 @@ +; In coro-split, this coroutine code reduced IR, produced using clang with async-exceptions +; crashed after first phase of fix because the terminator cleanupret was not implemented on predecessor fixer at the time +; RUN: opt < %s -passes='coro-split' -S +; RUN: opt < %s -passes='default,coro-split' -S + +; Function Attrs: presplitcoroutine +define i8 @"?resuming_on_new_thread@@YA?AUtask@@V?$unique_ptr@HU?$default_delete@H@std@@@std@@0@Z"(ptr %0) #0 personality ptr null { + invoke void @llvm.seh.scope.begin() + to label %2 unwind label %15 + +2: ; preds = %1 + %3 = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null) + %4 = call ptr @llvm.coro.begin(token %3, ptr null) + %5 = call token @llvm.coro.save(ptr null) + %6 = call i8 @llvm.coro.suspend(token none, i1 false) + invoke void @llvm.seh.try.begin() + to label %7 unwind label %8 + +7: ; preds = %2 + ret i8 0 + +8: ; preds = %2 + %9 = catchswitch within none [label %10] unwind label %12 + +10: ; preds = %8 + %11 = catchpad within %9 [ptr null, i32 0, ptr null] + ret i8 0 + +12: ; preds = %8 + %13 = cleanuppad within none [] + invoke void @llvm.seh.scope.end() + to label %14 unwind label %15 + +14: ; preds = %12 + ret i8 0 + +15: ; preds = %12, %1 + %16 = cleanuppad within none [] + cleanupret from %16 unwind label %17 + +17: ; preds = %15 + %18 = cleanuppad within none [] + store i32 0, ptr %0, align 4 + cleanupret from %18 unwind to caller +} + +attributes #0 = { presplitcoroutine } + +!llvm.module.flags = !{!0} + +!0 = !{i32 2, !"eh-asynch", i32 1} diff --git a/llvm/test/Transforms/Coroutines/pr148035_unwind_edges_not_having_same_dest.ll b/llvm/test/Transforms/Coroutines/pr148035_unwind_edges_not_having_same_dest.ll new file mode 100644 index 0000000000000..997346f5e3d0c --- /dev/null +++ b/llvm/test/Transforms/Coroutines/pr148035_unwind_edges_not_having_same_dest.ll @@ -0,0 +1,42 @@ +; In coro-split, this coroutine standard code reduced IR, produced using clang with async-exceptions +; crashed before fix because of the validation mismatch of Unwind edges out of a funclet pad must have the same unwind dest +; RUN: opt < %s -passes='coro-split' -S 2>&1 | FileCheck %s --implicit-check-not="Unwind edges out of a funclet pad must have the same unwind dest" + +; Function Attrs: presplitcoroutine +define i8 @"?resuming_on_new_thread@@YA?AUtask@@AEAVjthread@std@@@Z"() #0 personality ptr null { + %1 = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null) + %2 = call ptr @llvm.coro.begin(token %1, ptr null) + %3 = call token @llvm.coro.save(ptr null) + %4 = call i8 @llvm.coro.suspend(token none, i1 false) + invoke void @llvm.seh.try.begin() + to label %common.ret unwind label %5 + +common.ret: ; preds = %13, %7, %0 + ret i8 0 + +5: ; preds = %0 + %6 = catchswitch within none [label %7] unwind label %9 + +7: ; preds = %5 + %8 = catchpad within %6 [ptr null, i32 0, ptr null] + br label %common.ret + +9: ; preds = %5 + %10 = cleanuppad within none [] + invoke void @llvm.seh.scope.end() [ "funclet"(token %10) ] + to label %11 unwind label %13 + +11: ; preds = %9 + %12 = call i1 @llvm.coro.end(ptr null, i1 true, token none) [ "funclet"(token %10) ] + cleanupret from %10 unwind label %13 + +13: ; preds = %11, %9 + %14 = cleanuppad within none [] + br label %common.ret +} + +attributes #0 = { presplitcoroutine } + +!llvm.module.flags = !{!0} + +!0 = !{i32 2, !"eh-asynch", i32 1} diff --git a/llvm/test/Transforms/Coroutines/pr148035_unwind_edges_not_having_same_dest_2.ll b/llvm/test/Transforms/Coroutines/pr148035_unwind_edges_not_having_same_dest_2.ll new file mode 100644 index 0000000000000..4b6163f45a53c --- /dev/null +++ b/llvm/test/Transforms/Coroutines/pr148035_unwind_edges_not_having_same_dest_2.ll @@ -0,0 +1,43 @@ +; In coro-split, this coroutine standard code reduced IR, produced using clang with async-exceptions +; crashed before fix because of the validation mismatch of Unwind edges out of a funclet pad must have the same unwind dest +; RUN: opt < %s -passes='coro-split' -S 2>&1 | FileCheck %s --implicit-check-not="Unwind edges out of a funclet pad must have the same unwind dest" + +; Function Attrs: presplitcoroutine +define i1 @"?resuming_on_new_thread@@YA?AUtask@@AEAVjthread@std@@@Z"() #0 personality ptr null { + %1 = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null) + %2 = call ptr @llvm.coro.begin(token %1, ptr null) + %3 = call token @llvm.coro.save(ptr null) + %4 = call i8 @llvm.coro.suspend(token none, i1 false) + invoke void @llvm.seh.try.begin() + to label %common.ret unwind label %5 + +common.ret: ; preds = %11, %13, %7, %0 + %common.ret.op = phi i1 [ false, %11], [false, %13 ], [false, %7], [false, %0] + ret i1 %common.ret.op + +5: ; preds = %0 + %6 = catchswitch within none [label %7] unwind label %9 + +7: ; preds = %5 + %8 = catchpad within %6 [ptr null, i32 0, ptr null] + br label %common.ret + +9: ; preds = %5 + %10 = cleanuppad within none [] + invoke void @llvm.seh.scope.end() [ "funclet"(token %10) ] + to label %11 unwind label %13 + +11: ; preds = %9 + %12 = call i1 @llvm.coro.end(ptr null, i1 true, token none) [ "funclet"(token %10) ] + br label %common.ret + +13: ; preds = %9 + %14 = cleanuppad within none [] + br label %common.ret +} + +attributes #0 = { presplitcoroutine } + +!llvm.module.flags = !{!0} + +!0 = !{i32 2, !"eh-asynch", i32 1}