[coroutines] Handle unwind edge splitting

Summary: RewritePHIs algorithm used in building of CoroFrame inserts a placeholder ``` %placeholder = phi [%val] ``` on every edge leading to a block starting with PHI node with multiple incoming edges, so that if one of the incoming values was spilled and need to be reloaded, we have a place to insert a reload. We use SplitEdge helper function to split the incoming edge. SplitEdge function does not deal with unwind edges comping into a block with an EHPad. This patch adds an ehAwareSplitEdge function that can correctly split the unwind edge. For landing pads, we clone the landing pad into every edge block and replace the original landing pad with a PHI collection the values from all incoming landing pads. For WinEH pads, we keep the original EHPad in place and insert cleanuppad/cleapret in the edge blocks. Reviewers: majnemer, rnk Reviewed By: majnemer Subscribers: EricWF, llvm-commits Differential Revision: https://reviews.llvm.org/D31845 llvm-svn: 303172
2025-02-14 09:25:25 +00:00 · 2017-05-16 14:11:39 +00:00 · 2017-05-16 14:11:39 +00:00 · e2a5e02b38
commit e2a5e02b38
parent a9d15a6b2e
2 changed files with 314 additions and 4 deletions
--- a/lib/Transforms/Coroutines/CoroFrame.cpp
+++ b/lib/Transforms/Coroutines/CoroFrame.cpp
@ -177,7 +177,7 @@ SuspendCrossingInfo::SuspendCrossingInfo(Function &F, coro::Shape &Shape)
  // consume. Note, that crossing coro.save also requires a spill, as any code
  // between coro.save and coro.suspend may resume the coroutine and all of the
  // state needs to be saved by that time.
-  auto markSuspendBlock = [&](IntrinsicInst* BarrierInst) {
+  auto markSuspendBlock = [&](IntrinsicInst *BarrierInst) {
    BasicBlock *SuspendBlock = BarrierInst->getParent();
    auto &B = getBlockData(SuspendBlock);
    B.Suspend = true;
@ -495,6 +495,78 @@ static Instruction *insertSpills(SpillInfo &Spills, coro::Shape &Shape) {
  return FramePtr;
 }

+// Sets the unwind edge of an instruction to a particular successor.
+static void setUnwindEdgeTo(TerminatorInst *TI, BasicBlock *Succ) {
+  if (auto *II = dyn_cast<InvokeInst>(TI))
+    II->setUnwindDest(Succ);
+  else if (auto *CS = dyn_cast<CatchSwitchInst>(TI))
+    CS->setUnwindDest(Succ);
+  else if (auto *CR = dyn_cast<CleanupReturnInst>(TI))
+    CR->setUnwindDest(Succ);
+  else
+    llvm_unreachable("unexpected terminator instruction");
+}
+
+// Replaces all uses of OldPred with the NewPred block in all PHINodes in a
+// block.
+static void updatePhiNodes(BasicBlock *DestBB, BasicBlock *OldPred,
+                           BasicBlock *NewPred,
+                           PHINode *LandingPadReplacement) {
+  unsigned BBIdx = 0;
+  for (BasicBlock::iterator I = DestBB->begin(); isa<PHINode>(I); ++I) {
+    PHINode *PN = cast<PHINode>(I);
+
+    // We manually update the LandingPadReplacement PHINode and it is the last
+    // PHI Node. So, if we find it, we are done.
+    if (LandingPadReplacement == PN)
+      break;
+
+    // Reuse the previous value of BBIdx if it lines up.  In cases where we
+    // have multiple phi nodes with *lots* of predecessors, this is a speed
+    // win because we don't have to scan the PHI looking for TIBB.  This
+    // happens because the BB list of PHI nodes are usually in the same
+    // order.
+    if (PN->getIncomingBlock(BBIdx) != OldPred)
+      BBIdx = PN->getBasicBlockIndex(OldPred);
+
+    assert(BBIdx != (unsigned)-1 && "Invalid PHI Index!");
+    PN->setIncomingBlock(BBIdx, NewPred);
+  }
+}
+
+// Uses SplitEdge unless the successor block is an EHPad, in which case do EH
+// specific handling.
+static BasicBlock *ehAwareSplitEdge(BasicBlock *BB, BasicBlock *Succ,
+                                    LandingPadInst *OriginalPad,
+                                    PHINode *LandingPadReplacement) {
+  auto *PadInst = Succ->getFirstNonPHI();
+  if (!LandingPadReplacement && !PadInst->isEHPad())
+    return SplitEdge(BB, Succ);
+
+  auto *NewBB = BasicBlock::Create(BB->getContext(), "", BB->getParent(), Succ);
+  setUnwindEdgeTo(BB->getTerminator(), NewBB);
+  updatePhiNodes(Succ, BB, NewBB, LandingPadReplacement);
+
+  if (LandingPadReplacement) {
+    auto *NewLP = OriginalPad->clone();
+    auto *Terminator = BranchInst::Create(Succ, NewBB);
+    NewLP->insertBefore(Terminator);
+    LandingPadReplacement->addIncoming(NewLP, NewBB);
+    return NewBB;
+  }
+  Value *ParentPad = nullptr;
+  if (auto *FuncletPad = dyn_cast<FuncletPadInst>(PadInst))
+    ParentPad = FuncletPad->getParentPad();
+  else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(PadInst))
+    ParentPad = CatchSwitch->getParentPad();
+  else
+    llvm_unreachable("handling for other EHPads not implemented yet");
+
+  auto *NewCleanupPad = CleanupPadInst::Create(ParentPad, {}, "", NewBB);
+  CleanupReturnInst::Create(NewCleanupPad, Succ, NewBB);
+  return NewBB;
+}
+
 static void rewritePHIs(BasicBlock &BB) {
  // For every incoming edge we will create a block holding all
  // incoming values in a single PHI nodes.
@ -502,7 +574,7 @@ static void rewritePHIs(BasicBlock &BB) {
  // loop:
  //    %n.val = phi i32[%n, %entry], [%inc, %loop]
  //
-  // It will create:  
+  // It will create:
  //
  // loop.from.entry:
  //    %n.loop.pre = phi i32 [%n, %entry]
@ -517,9 +589,22 @@ static void rewritePHIs(BasicBlock &BB) {
  // TODO: Simplify PHINodes in the basic block to remove duplicate
  // predecessors.

+  LandingPadInst *LandingPad = nullptr;
+  PHINode *ReplPHI = nullptr;
+  if ((LandingPad = dyn_cast_or_null<LandingPadInst>(BB.getFirstNonPHI()))) {
+    // ehAwareSplitEdge will clone the LandingPad in all the edge blocks.
+    // We replace the original landing pad with a PHINode that will collect the
+    // results from all of them.
+    ReplPHI = PHINode::Create(LandingPad->getType(), 1, "", LandingPad);
+    ReplPHI->takeName(LandingPad);
+    LandingPad->replaceAllUsesWith(ReplPHI);
+    // We will erase the original landing pad at the end of this function after
+    // ehAwareSplitEdge cloned it in the transition blocks.
+  }
+
  SmallVector<BasicBlock *, 8> Preds(pred_begin(&BB), pred_end(&BB));
  for (BasicBlock *Pred : Preds) {
-    auto *IncomingBB = SplitEdge(Pred, &BB);
+    auto *IncomingBB = ehAwareSplitEdge(Pred, &BB, LandingPad, ReplPHI);
    IncomingBB->setName(BB.getName() + Twine(".from.") + Pred->getName());
    auto *PN = cast<PHINode>(&BB.front());
    do {
@ -531,7 +616,14 @@ static void rewritePHIs(BasicBlock &BB) {
      InputV->addIncoming(V, Pred);
      PN->setIncomingValue(Index, InputV);
      PN = dyn_cast<PHINode>(PN->getNextNode());
-    } while (PN);
+    } while (PN != ReplPHI); // ReplPHI is either null or the PHI that replaced
+                             // the landing pad.
+  }
+
+  if (LandingPad) {
+    // Calls to ehAwareSplitEdge function cloned the original lading pad.
+    // No longer need it.
+    LandingPad->eraseFromParent();
  }
 }

--- a/test/Transforms/Coroutines/coro-eh-aware-edge-split.ll
+++ b/test/Transforms/Coroutines/coro-eh-aware-edge-split.ll
@ -0,0 +1,218 @@
+; Check that we can handle edge splits leading into a landingpad
+; RUN: opt < %s -coro-split -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; CHECK-LABEL: define internal fastcc void @f.resume(
+define void @f(i1 %cond) "coroutine.presplit"="1" personality i32 0 {
+entry:
+  %id = call token @llvm.coro.id(i32 16, i8* null, i8* null, i8* null)
+  %size = tail call i64 @llvm.coro.size.i64()
+  %alloc = call i8* @malloc(i64 %size)
+  %hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc)
+  %sp = call i8 @llvm.coro.suspend(token none, i1 false)
+  switch i8 %sp, label %coro.ret [
+    i8 0, label %resume
+    i8 1, label %cleanup
+  ]
+
+resume:
+  br i1 %cond, label %invoke1, label %invoke2
+
+invoke1:
+  invoke void @may_throw1()
+          to label %unreach unwind label %pad.with.phi
+invoke2:
+  invoke void @may_throw2()
+          to label %unreach unwind label %pad.with.phi
+
+; Verify that we cloned landing pad on every edge and inserted a reload of the spilled value
+
+; CHECK: pad.with.phi.from.invoke2:
+; CHECK:   %0 = landingpad { i8*, i32 }
+; CHECK:           catch i8* null
+; CHECK:   br label %pad.with.phi
+
+; CHECK: pad.with.phi.from.invoke1:
+; CHECK:   %1 = landingpad { i8*, i32 }
+; CHECK:           catch i8* null
+; CHECK:   br label %pad.with.phi
+
+; CHECK: pad.with.phi:
+; CHECK:   %val = phi i32 [ 0, %pad.with.phi.from.invoke1 ], [ 1, %pad.with.phi.from.invoke2 ]
+; CHECK:   %lp = phi { i8*, i32 } [ %0, %pad.with.phi.from.invoke2 ], [ %1, %pad.with.phi.from.invoke1 ]
+; CHECK:   %exn = extractvalue { i8*, i32 } %lp, 0
+; CHECK:   call i8* @__cxa_begin_catch(i8* %exn)
+; CHECK:   call void @use_val(i32 %val)
+; CHECK:   call void @__cxa_end_catch()
+; CHECK:   call void @free(i8* %vFrame)
+; CHECK:   ret void
+
+pad.with.phi:
+  %val = phi i32 [ 0, %invoke1 ], [ 1, %invoke2 ]
+  %lp = landingpad { i8*, i32 }
+          catch i8* null
+  %exn = extractvalue { i8*, i32 } %lp, 0
+  call i8* @__cxa_begin_catch(i8* %exn)
+  call void @use_val(i32 %val)
+  call void @__cxa_end_catch()
+  br label %cleanup
+
+cleanup:                                        ; preds = %invoke.cont15, %if.else, %if.then, %ehcleanup21, %init.suspend
+  %mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
+  call void @free(i8* %mem)
+  br label %coro.ret
+
+coro.ret:
+  call i1 @llvm.coro.end(i8* null, i1 false)
+  ret void
+
+unreach:
+  unreachable
+}
+
+; CHECK-LABEL: define internal fastcc void @g.resume(
+define void @g(i1 %cond, i32 %x, i32 %y) "coroutine.presplit"="1" personality i32 0 {
+entry:
+  %id = call token @llvm.coro.id(i32 16, i8* null, i8* null, i8* null)
+  %size = tail call i64 @llvm.coro.size.i64()
+  %alloc = call i8* @malloc(i64 %size)
+  %hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc)
+  %sp = call i8 @llvm.coro.suspend(token none, i1 false)
+  switch i8 %sp, label %coro.ret [
+    i8 0, label %resume
+    i8 1, label %cleanup
+  ]
+
+resume:
+  br i1 %cond, label %invoke1, label %invoke2
+
+invoke1:
+  invoke void @may_throw1()
+          to label %unreach unwind label %pad.with.phi
+invoke2:
+  invoke void @may_throw2()
+          to label %unreach unwind label %pad.with.phi
+
+; Verify that we created cleanuppads on every edge and inserted a reload of the spilled value
+
+; CHECK: pad.with.phi.from.invoke2:
+; CHECK:   %0 = cleanuppad within none []
+; CHECK:   %y.reload.addr = getelementptr inbounds %g.Frame, %g.Frame* %FramePtr, i32 0, i32 6
+; CHECK:   %y.reload = load i32, i32* %y.reload.addr
+; CHECK:   cleanupret from %0 unwind label %pad.with.phi
+
+; CHECK: pad.with.phi.from.invoke1:
+; CHECK:   %1 = cleanuppad within none []
+; CHECK:   %x.reload.addr = getelementptr inbounds %g.Frame, %g.Frame* %FramePtr, i32 0, i32 5
+; CHECK:   %x.reload = load i32, i32* %x.reload.addr
+; CHECK:   cleanupret from %1 unwind label %pad.with.phi
+
+; CHECK: pad.with.phi:
+; CHECK:   %val = phi i32 [ %x.reload, %pad.with.phi.from.invoke1 ], [ %y.reload, %pad.with.phi.from.invoke2 ]
+; CHECK:   %tok = cleanuppad within none []
+; CHECK:   call void @use_val(i32 %val)
+; CHECK:   cleanupret from %tok unwind to caller
+
+pad.with.phi:
+  %val = phi i32 [ %x, %invoke1 ], [ %y, %invoke2 ]
+  %tok = cleanuppad within none []
+  call void @use_val(i32 %val)
+  cleanupret from %tok unwind to caller
+
+cleanup:                                        ; preds = %invoke.cont15, %if.else, %if.then, %ehcleanup21, %init.suspend
+  %mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
+  call void @free(i8* %mem)
+  br label %coro.ret
+
+coro.ret:
+  call i1 @llvm.coro.end(i8* null, i1 false)
+  ret void
+
+unreach:
+  unreachable
+}
+
+; CHECK-LABEL: define internal fastcc void @h.resume(
+define void @h(i1 %cond, i32 %x, i32 %y) "coroutine.presplit"="1" personality i32 0 {
+entry:
+  %id = call token @llvm.coro.id(i32 16, i8* null, i8* null, i8* null)
+  %size = tail call i64 @llvm.coro.size.i64()
+  %alloc = call i8* @malloc(i64 %size)
+  %hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc)
+  %sp = call i8 @llvm.coro.suspend(token none, i1 false)
+  switch i8 %sp, label %coro.ret [
+    i8 0, label %resume
+    i8 1, label %cleanup
+  ]
+
+resume:
+  br i1 %cond, label %invoke1, label %invoke2
+
+invoke1:
+  invoke void @may_throw1()
+          to label %coro.ret unwind label %pad.with.phi
+invoke2:
+  invoke void @may_throw2()
+          to label %coro.ret unwind label %pad.with.phi
+
+; Verify that we created cleanuppads on every edge and inserted a reload of the spilled value
+
+; CHECK: pad.with.phi.from.invoke2:
+; CHECK:   %0 = cleanuppad within none []
+; CHECK:   %y.reload.addr = getelementptr inbounds %h.Frame, %h.Frame* %FramePtr, i32 0, i32 6
+; CHECK:   %y.reload = load i32, i32* %y.reload.addr
+; CHECK:   cleanupret from %0 unwind label %pad.with.phi
+
+; CHECK: pad.with.phi.from.invoke1:
+; CHECK:   %1 = cleanuppad within none []
+; CHECK:   %x.reload.addr = getelementptr inbounds %h.Frame, %h.Frame* %FramePtr, i32 0, i32 5
+; CHECK:   %x.reload = load i32, i32* %x.reload.addr
+; CHECK:   cleanupret from %1 unwind label %pad.with.phi
+
+; CHECK: pad.with.phi:
+; CHECK:   %val = phi i32 [ %x.reload, %pad.with.phi.from.invoke1 ], [ %y.reload, %pad.with.phi.from.invoke2 ]
+; CHECK:   %switch = catchswitch within none [label %catch] unwind to caller
+pad.with.phi:
+  %val = phi i32 [ %x, %invoke1 ], [ %y, %invoke2 ]
+  %switch = catchswitch within none [label %catch] unwind to caller
+
+catch:                                            ; preds = %catch.dispatch
+  %pad = catchpad within %switch [i8* null, i32 64, i8* null]
+  call void @use_val(i32 %val)
+  catchret from %pad to label %coro.ret
+
+cleanup:                                        ; preds = %invoke.cont15, %if.else, %if.then, %ehcleanup21, %init.suspend
+  %mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
+  call void @free(i8* %mem)
+  br label %coro.ret
+
+coro.ret:
+  call i1 @llvm.coro.end(i8* null, i1 false)
+  ret void
+}
+
+; Function Attrs: argmemonly nounwind readonly
+declare token @llvm.coro.id(i32, i8* readnone, i8* nocapture readonly, i8*)
+declare noalias i8* @malloc(i64)
+declare i64 @llvm.coro.size.i64()
+declare i8* @llvm.coro.begin(token, i8* writeonly)
+
+; Function Attrs: nounwind
+declare token @llvm.coro.save(i8*)
+declare i8 @llvm.coro.suspend(token, i1)
+
+; Function Attrs: argmemonly nounwind
+declare void @may_throw1()
+declare void @may_throw2()
+
+declare i8* @__cxa_begin_catch(i8*)
+
+declare void @use_val(i32)
+declare void @__cxa_end_catch()
+
+; Function Attrs: nounwind
+declare i1 @llvm.coro.end(i8*, i1)
+declare void @free(i8*)
+declare i8* @llvm.coro.free(token, i8* nocapture readonly)