-
Notifications
You must be signed in to change notification settings - Fork 14.5k
[ScheduleDAG] Fix and simplify the algorithm used for finding callseq_start #149692
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-backend-m68k @llvm/pr-subscribers-llvm-selectiondag Author: Min-Yih Hsu (mshockwave) ChangesScheduleDAGRR uses I guess this hasn't been a problem for nearly 15(!) years since the original algorithm was written because only ISAs that require lots of memory movements around function calls (i.e. pushing arguments to stack) are more likely to trigger. And X86 is probably the only ISA other than 68k that qualifies, but I guess X86 has a slightly different chain value arrangement in this case. Full diff: https://github.com/llvm/llvm-project/pull/149692.diff 2 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index a570b71ecd28d..d45e0dbf45df5 100644
--- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -478,62 +478,61 @@ static bool IsChainDependent(SDNode *Outer, SDNode *Inner,
}
}
-/// FindCallSeqStart - Starting from the (lowered) CALLSEQ_END node, locate
+/// findCallSeqStart - Starting from the (lowered) CALLSEQ_END node, locate
/// the corresponding (lowered) CALLSEQ_BEGIN node.
///
-/// NestLevel and MaxNested are used in recursion to indcate the current level
-/// of nesting of CALLSEQ_BEGIN and CALLSEQ_END pairs, as well as the maximum
-/// level seen so far.
-///
/// TODO: It would be better to give CALLSEQ_END an explicit operand to point
/// to the corresponding CALLSEQ_BEGIN to avoid needing to search for it.
-static SDNode *
-FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest,
- const TargetInstrInfo *TII) {
- while (true) {
- // For a TokenFactor, examine each operand. There may be multiple ways
- // to get to the CALLSEQ_BEGIN, but we need to find the path with the
- // most nesting in order to ensure that we find the corresponding match.
+static SDNode *findCallSeqStart(SDNode *CallSeqEnd,
+ const TargetInstrInfo *TII) {
+ SmallVector<SDNode *> DFSStack;
+ SmallPtrSet<SDNode *, 8> Visited;
+ unsigned CallSeqDepth = 0;
+
+ DFSStack.push_back(CallSeqEnd);
+ Visited.insert(CallSeqEnd);
+
+ while (!DFSStack.empty()) {
+ SDNode *N = DFSStack.back();
+
if (N->getOpcode() == ISD::TokenFactor) {
- SDNode *Best = nullptr;
- unsigned BestMaxNest = MaxNest;
for (const SDValue &Op : N->op_values()) {
- unsigned MyNestLevel = NestLevel;
- unsigned MyMaxNest = MaxNest;
- if (SDNode *New = FindCallSeqStart(Op.getNode(),
- MyNestLevel, MyMaxNest, TII))
- if (!Best || (MyMaxNest > BestMaxNest)) {
- Best = New;
- BestMaxNest = MyMaxNest;
- }
+ if (!Visited.insert(Op.getNode()).second)
+ continue;
+ DFSStack.push_back(Op.getNode());
+ break;
}
- assert(Best);
- MaxNest = BestMaxNest;
- return Best;
+ if (DFSStack.back() == N)
+ DFSStack.pop_back();
+ continue;
}
- // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END.
+
+ // Not a TokenFactor, no need to keep it in the DFS stack.
+ DFSStack.pop_back();
+
if (N->isMachineOpcode()) {
if (N->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
- ++NestLevel;
- MaxNest = std::max(MaxNest, NestLevel);
+ ++CallSeqDepth;
} else if (N->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
- assert(NestLevel != 0);
- --NestLevel;
- if (NestLevel == 0)
+ assert(CallSeqDepth);
+ if (--CallSeqDepth == 0)
return N;
}
}
- // Otherwise, find the chain and continue climbing.
- for (const SDValue &Op : N->op_values())
- if (Op.getValueType() == MVT::Other) {
- N = Op.getNode();
- goto found_chain_operand;
- }
- return nullptr;
- found_chain_operand:;
- if (N->getOpcode() == ISD::EntryToken)
- return nullptr;
+
+ // Find the chain and continue climbing.
+ // A SDNode should only have a single input chain.
+ auto OpValues = N->op_values();
+ auto ItChain = find_if(OpValues, [](const SDValue &Op) {
+ return Op.getValueType() == MVT::Other;
+ });
+ if (ItChain != OpValues.end() &&
+ (*ItChain)->getOpcode() != ISD::EntryToken &&
+ Visited.insert((*ItChain).getNode()).second)
+ DFSStack.push_back((*ItChain).getNode());
}
+
+ return nullptr;
}
/// Call ReleasePred for each predecessor, then update register live def/gen.
@@ -581,9 +580,7 @@ void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) {
for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode())
if (Node->isMachineOpcode() &&
Node->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
- unsigned NestLevel = 0;
- unsigned MaxNest = 0;
- SDNode *N = FindCallSeqStart(Node, NestLevel, MaxNest, TII);
+ SDNode *N = findCallSeqStart(Node, TII);
assert(N && "Must find call sequence start");
SUnit *Def = &SUnits[N->getNodeId()];
diff --git a/llvm/test/CodeGen/M68k/PR146213.ll b/llvm/test/CodeGen/M68k/PR146213.ll
new file mode 100644
index 0000000000000..a2aca8927ce86
--- /dev/null
+++ b/llvm/test/CodeGen/M68k/PR146213.ll
@@ -0,0 +1,31 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=m68k-linux | FileCheck %s
+
+define double @minimized() nounwind {
+; CHECK-LABEL: minimized:
+; CHECK: ; %bb.0: ; %start
+; CHECK-NEXT: suba.l #20, %sp
+; CHECK-NEXT: movem.l %a2, (16,%sp) ; 8-byte Folded Spill
+; CHECK-NEXT: move.l #0, (4,%sp)
+; CHECK-NEXT: move.l #0, (%sp)
+; CHECK-NEXT: jsr $0
+; CHECK-NEXT: suba.l %a2, %a2
+; CHECK-NEXT: move.l (%a2), (%sp)
+; CHECK-NEXT: move.l #0, (12,%sp)
+; CHECK-NEXT: move.l #0, (8,%sp)
+; CHECK-NEXT: move.l $4, (4,%sp)
+; CHECK-NEXT: jsr __muldf3
+; CHECK-NEXT: move.l %d0, (%a2)
+; CHECK-NEXT: move.l %d1, $4
+; CHECK-NEXT: move.l %a2, %d0
+; CHECK-NEXT: move.l %a2, %d1
+; CHECK-NEXT: movem.l (16,%sp), %a2 ; 8-byte Folded Reload
+; CHECK-NEXT: adda.l #20, %sp
+; CHECK-NEXT: rts
+start:
+ %_58 = call double null(double 0.000000e+00)
+ %_60 = load double, ptr null, align 8
+ %_57 = fmul double 0.000000e+00, %_60
+ store double %_57, ptr null, align 8
+ ret double 0.000000e+00
+}
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Clean up*
I don't quite follow, could you point out the part you were commenting on? |
ScheduleDAGRR uses
FindCallSeqStart
to find the matchingcallseq_start
from acallseq_end
. It does this search using a combination of DFS-ish + heuristics based on maximum nested level of callseq_start & callseq_end on each search path. But it struggles on cases like #146213 in which search paths that resolved into the same nested level triggered an assertion.Fundamentally, I don't think heuristics used in the original algorithm is 100% correct: for a given callseq_end, if there are multiple callseq_start on the dependency chains that could fully match it -- even taking into the account of nested callseq_start/end -- then there is probably something wrong in the construction of the DAG. So searching different search paths for maximum nested level might not be necessary.
I simplify this function by visiting related nodes with proper DFS and matches callseq_start/end along the way.
I guess this hasn't been a problem for nearly 15(!) years since the original algorithm was written because only ISAs that require lots of memory movements around function calls (i.e. pushing arguments to stack) are more likely to trigger. And X86 is probably the only ISA other than 68k that qualifies, but I guess X86 has a slightly different chain value arrangement in this case.