[LCG] Implement Tarjan's algorithm correctly this time. We have to walk

up the stack finishing the exploration of each entries children before we're finished in addition to accounting for their low-links. Added a unittest that really hammers home the need for this with interlocking cycles that would each appear distinct otherwise and crash or compute the wrong result. As part of this, nuke a stale fixme and bring the rest of the implementation still more closely in line with the original algorithm. llvm-svn: 206966
2024-12-03 08:51:43 +00:00 · 2014-04-23 10:31:17 +00:00 · 2014-04-23 10:31:17 +00:00 · ba4ce79281
commit ba4ce79281
parent ad48f73242
3 changed files with 100 additions and 34 deletions
--- a/include/llvm/Analysis/LazyCallGraph.h
+++ b/include/llvm/Analysis/LazyCallGraph.h
@ -381,7 +381,8 @@ private:
  /// \brief Helper to form a new SCC out of the top of a DFSStack-like
  /// structure.
  SCC *formSCCFromDFSStack(
-      SmallVectorImpl<std::pair<Node *, Node::iterator>> &DFSStack);
+      SmallVectorImpl<std::pair<Node *, Node::iterator>> &DFSStack,
+      SmallVectorImpl<std::pair<Node *, Node::iterator>>::iterator SCCBegin);

  /// \brief Retrieve the next node in the post-order SCC walk of the call graph.
  SCC *getNextSCCInPostOrder();
--- a/lib/Analysis/LazyCallGraph.cpp
+++ b/lib/Analysis/LazyCallGraph.cpp
@ -152,27 +152,26 @@ void LazyCallGraph::updateGraphPtrs() {
 }

 LazyCallGraph::SCC *LazyCallGraph::formSCCFromDFSStack(
-    SmallVectorImpl<std::pair<Node *, Node::iterator>> &DFSStack) {
+    SmallVectorImpl<std::pair<Node *, Node::iterator>> &DFSStack,
+    SmallVectorImpl<std::pair<Node *, Node::iterator>>::iterator SCCBegin) {
  // The tail of the stack is the new SCC. Allocate the SCC and pop the stack
  // into it.
  SCC *NewSCC = new (SCCBPA.Allocate()) SCC();

-  // Because we don't follow the strict Tarjan recursive formulation, walk
-  // from the top of the stack down, propagating the lowest link and stopping
-  // when the DFS number is the lowest link.
-  int LowestLink = DFSStack.back().first->LowLink;
-  do {
-    Node *SCCN = DFSStack.pop_back_val().first;
+  for (auto I = SCCBegin, E = DFSStack.end(); I != E; ++I) {
+    Node *SCCN = I->first;
+    assert(SCCN->LowLink >= SCCBegin->first->LowLink &&
+           "We cannot have a low link in an SCC lower than its root on the "
+           "stack!");
+
    SCCMap[&SCCN->getFunction()] = NewSCC;
    NewSCC->Nodes.push_back(SCCN);
-    LowestLink = std::min(LowestLink, SCCN->LowLink);
    bool Inserted =
        NewSCC->NodeSet.insert(&SCCN->getFunction());
    (void)Inserted;
    assert(Inserted && "Cannot have duplicates in the DFSStack!");
-  } while (!DFSStack.empty() && LowestLink <= DFSStack.back().first->DFSNumber);
-  assert(LowestLink == NewSCC->Nodes.back()->DFSNumber &&
-         "Cannot stop with a DFS number greater than the lowest link!");
+  }
+  DFSStack.erase(SCCBegin, DFSStack.end());

  // A final pass over all edges in the SCC (this remains linear as we only
  // do this once when we build the SCC) to connect it to the parent sets of
@ -209,36 +208,45 @@ LazyCallGraph::SCC *LazyCallGraph::getNextSCCInPostOrder() {
    DFSStack.push_back(std::make_pair(N, N->begin()));
  }

-  Node *N = DFSStack.back().first;
-  if (N->DFSNumber == 0) {
+  auto SI = DFSStack.rbegin();
+  if (SI->first->DFSNumber == 0) {
    // This node hasn't been visited before, assign it a DFS number and remove
    // it from the entry set.
-    N->LowLink = N->DFSNumber = NextDFSNumber++;
-    SCCEntryNodes.remove(&N->getFunction());
+    SI->first->LowLink = SI->first->DFSNumber = NextDFSNumber++;
+    SCCEntryNodes.remove(&SI->first->getFunction());
  }

-  for (auto I = DFSStack.back().second, E = N->end(); I != E; ++I) {
-    Node *ChildN = *I;
-    if (ChildN->DFSNumber == 0) {
-      // Mark that we should start at this child when next this node is the
-      // top of the stack. We don't start at the next child to ensure this
-      // child's lowlink is reflected.
-      // FIXME: I don't actually think this is required, and we could start
-      // at the next child.
-      DFSStack.back().second = I;
+  do {
+    Node *N = SI->first;
+    for (auto I = SI->second, E = N->end(); I != E; ++I) {
+      Node *ChildN = *I;
+      if (ChildN->DFSNumber == 0) {
+        // Mark that we should start at this child when next this node is the
+        // top of the stack. We don't start at the next child to ensure this
+        // child's lowlink is reflected.
+        SI->second = I;

-      // Recurse onto this node via a tail call.
-      DFSStack.push_back(std::make_pair(ChildN, ChildN->begin()));
-      return LazyCallGraph::getNextSCCInPostOrder();
+        // Recurse onto this node via a tail call.
+        DFSStack.push_back(std::make_pair(ChildN, ChildN->begin()));
+        return LazyCallGraph::getNextSCCInPostOrder();
+      }
+
+      // Track the lowest link of the childen, if any are still in the stack.
+      if (ChildN->LowLink < N->LowLink && !SCCMap.count(&ChildN->getFunction()))
+        N->LowLink = ChildN->LowLink;
    }
+    // No more children to process for this stack entry.
+    SI->second = N->end();

-    // Track the lowest link of the childen, if any are still in the stack.
-    if (ChildN->LowLink < N->LowLink && !SCCMap.count(&ChildN->getFunction()))
-      N->LowLink = ChildN->LowLink;
-  }
+    if (N->LowLink == N->DFSNumber)
+      // Form the new SCC out of the top of the DFS stack.
+      return formSCCFromDFSStack(DFSStack, std::prev(SI.base()));

-  // Form the new SCC out of the top of the DFS stack.
-  return formSCCFromDFSStack(DFSStack);
+    ++SI;
+  } while (SI != DFSStack.rend());
+
+  llvm_unreachable(
+      "We cannot reach the bottom of the stack without popping an SCC.");
 }

 char LazyCallGraphAnalysis::PassID;
--- a/unittests/Analysis/LazyCallGraphTest.cpp
+++ b/unittests/Analysis/LazyCallGraphTest.cpp
@ -248,4 +248,61 @@ TEST(LazyCallGraphTest, BasicGraphFormation) {
  EXPECT_EQ(CG.postorder_scc_end(), SCCI);
 }

+static Function &lookupFunction(Module &M, StringRef Name) {
+  for (Function &F : M)
+    if (F.getName() == Name)
+      return F;
+  report_fatal_error("Couldn't find function!");
+}
+
+TEST(LazyCallGraphTest, MultiArmSCC) {
+  // Two interlocking cycles. The really useful thing about this SCC is that it
+  // will require Tarjan's DFS to backtrack and finish processing all of the
+  // children of each node in the SCC.
+  std::unique_ptr<Module> M = parseAssembly(
+      "define void @a() {\n"
+      "entry:\n"
+      "  call void @b()\n"
+      "  call void @d()\n"
+      "  ret void\n"
+      "}\n"
+      "define void @b() {\n"
+      "entry:\n"
+      "  call void @c()\n"
+      "  ret void\n"
+      "}\n"
+      "define void @c() {\n"
+      "entry:\n"
+      "  call void @a()\n"
+      "  ret void\n"
+      "}\n"
+      "define void @d() {\n"
+      "entry:\n"
+      "  call void @e()\n"
+      "  ret void\n"
+      "}\n"
+      "define void @e() {\n"
+      "entry:\n"
+      "  call void @a()\n"
+      "  ret void\n"
+      "}\n");
+  LazyCallGraph CG(*M);
+
+  // Force the graph to be fully expanded.
+  auto SCCI = CG.postorder_scc_begin();
+  LazyCallGraph::SCC *SCC = *SCCI++;
+  EXPECT_EQ(CG.postorder_scc_end(), SCCI);
+
+  LazyCallGraph::Node *A = CG.lookup(lookupFunction(*M, "a"));
+  LazyCallGraph::Node *B = CG.lookup(lookupFunction(*M, "b"));
+  LazyCallGraph::Node *C = CG.lookup(lookupFunction(*M, "c"));
+  LazyCallGraph::Node *D = CG.lookup(lookupFunction(*M, "d"));
+  LazyCallGraph::Node *E = CG.lookup(lookupFunction(*M, "e"));
+  EXPECT_EQ(SCC, CG.lookupSCC(A->getFunction()));
+  EXPECT_EQ(SCC, CG.lookupSCC(B->getFunction()));
+  EXPECT_EQ(SCC, CG.lookupSCC(C->getFunction()));
+  EXPECT_EQ(SCC, CG.lookupSCC(D->getFunction()));
+  EXPECT_EQ(SCC, CG.lookupSCC(E->getFunction()));
+}
+
 }