From c57bdbb244d0c2492ae28265a1f5f86fcf64daac Mon Sep 17 00:00:00 2001 From: Brian Gesiak Date: Fri, 11 May 2018 03:12:28 +0000 Subject: [PATCH] [Coroutines] PR34897: Fix incorrect elisions Summary: https://bugs.llvm.org/show_bug.cgi?id=34897 demonstrates an incorrect coroutine frame allocation elision in the coro-elide pass. The elision is performed on the basis that the SSA variables from all llvm.coro.begin are directly referenced in subsequent llvm.coro.destroy instructions. However, this ignores the fact that the function may exit through paths that do not run these destroy instructions. In the sample program from PR34897, for example, the llvm.coro.destroy instruction is only executed in exception handling code. When the coroutine function exits normally, llvm.coro.destroy is not called. Eliding the allocation in this case causes a subsequent reference to the coroutine handle from outside of the function to access freed memory. To fix the issue, when finding an llvm.coro.destroy for each llvm.coro.begin, only consider llvm.coro.destroy that are executed along non-exceptional paths. Test Plan: 1. Download the sample program from https://bugs.llvm.org/show_bug.cgi?id=34897, compile it with `clang++ -fcoroutines-ts -stdlib=libc++ -std=c++1z -O2`, and run it. It should print `"run1\ncheck1\nrun2\ncheck2"` and then exit successfully. 2. Compile https://godbolt.org/g/mCKfnr and confirm it is still optimized to a single instruction, 'return 1190'. 3. `check-llvm` Reviewers: rsmith, GorNishanov, eric_niebler Reviewed By: GorNishanov Subscribers: andrewrk, lewissbaker, EricWF, llvm-commits Differential Revision: https://reviews.llvm.org/D43242 llvm-svn: 332077 --- lib/Transforms/Coroutines/CoroElide.cpp | 48 ++++++++++++++----- test/Transforms/Coroutines/coro-heap-elide.ll | 33 +++++++++++++ 2 files changed, 69 insertions(+), 12 deletions(-) diff --git a/lib/Transforms/Coroutines/CoroElide.cpp b/lib/Transforms/Coroutines/CoroElide.cpp index 5935eae5918..dfe05c4b2a5 100644 --- a/lib/Transforms/Coroutines/CoroElide.cpp +++ b/lib/Transforms/Coroutines/CoroElide.cpp @@ -14,6 +14,7 @@ #include "CoroInternal.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/InstIterator.h" #include "llvm/Pass.h" #include "llvm/Support/ErrorHandling.h" @@ -35,8 +36,8 @@ struct Lowerer : coro::LowererBase { Lowerer(Module &M) : LowererBase(M) {} void elideHeapAllocations(Function *F, Type *FrameTy, AAResults &AA); - bool shouldElide() const; - bool processCoroId(CoroIdInst *, AAResults &AA); + bool shouldElide(Function *F, DominatorTree &DT) const; + bool processCoroId(CoroIdInst *, AAResults &AA, DominatorTree &DT); }; } // end anonymous namespace @@ -141,33 +142,54 @@ void Lowerer::elideHeapAllocations(Function *F, Type *FrameTy, AAResults &AA) { removeTailCallAttribute(Frame, AA); } -bool Lowerer::shouldElide() const { +bool Lowerer::shouldElide(Function *F, DominatorTree &DT) const { // If no CoroAllocs, we cannot suppress allocation, so elision is not // possible. if (CoroAllocs.empty()) return false; // Check that for every coro.begin there is a coro.destroy directly - // referencing the SSA value of that coro.begin. If the value escaped, then - // coro.destroy would have been referencing a memory location storing that - // value and not the virtual register. + // referencing the SSA value of that coro.begin along a non-exceptional path. + // If the value escaped, then coro.destroy would have been referencing a + // memory location storing that value and not the virtual register. - SmallPtrSet ReferencedCoroBegins; + // First gather all of the non-exceptional terminators for the function. + SmallPtrSet Terminators; + for (BasicBlock &B : *F) { + auto *TI = B.getTerminator(); + if (TI->getNumSuccessors() == 0 && !TI->isExceptional() && + !isa(TI)) + Terminators.insert(TI); + } + // Filter out the coro.destroy that lie along exceptional paths. + SmallPtrSet DAs; for (CoroSubFnInst *DA : DestroyAddr) { + for (Instruction *TI : Terminators) { + if (DT.dominates(DA, TI)) { + DAs.insert(DA); + break; + } + } + } + + // Find all the coro.begin referenced by coro.destroy along happy paths. + SmallPtrSet ReferencedCoroBegins; + for (CoroSubFnInst *DA : DAs) { if (auto *CB = dyn_cast(DA->getFrame())) ReferencedCoroBegins.insert(CB); else return false; } - // If size of the set is the same as total number of CoroBegins, means we - // found a coro.free or coro.destroy mentioning a coro.begin and we can + // If size of the set is the same as total number of coro.begin, that means we + // found a coro.free or coro.destroy referencing each coro.begin, so we can // perform heap elision. return ReferencedCoroBegins.size() == CoroBegins.size(); } -bool Lowerer::processCoroId(CoroIdInst *CoroId, AAResults &AA) { +bool Lowerer::processCoroId(CoroIdInst *CoroId, AAResults &AA, + DominatorTree &DT) { CoroBegins.clear(); CoroAllocs.clear(); CoroFrees.clear(); @@ -213,7 +235,7 @@ bool Lowerer::processCoroId(CoroIdInst *CoroId, AAResults &AA) { replaceWithConstant(ResumeAddrConstant, ResumeAddr); - bool ShouldElide = shouldElide(); + bool ShouldElide = shouldElide(CoroId->getFunction(), DT); auto *DestroyAddrConstant = ConstantExpr::getExtractValue( Resumers, @@ -293,14 +315,16 @@ struct CoroElide : FunctionPass { return Changed; AAResults &AA = getAnalysis().getAAResults(); + DominatorTree &DT = getAnalysis().getDomTree(); for (auto *CII : L->CoroIds) - Changed |= L->processCoroId(CII, AA); + Changed |= L->processCoroId(CII, AA, DT); return Changed; } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); + AU.addRequired(); } StringRef getPassName() const override { return "Coroutine Elision"; } }; diff --git a/test/Transforms/Coroutines/coro-heap-elide.ll b/test/Transforms/Coroutines/coro-heap-elide.ll index 839f844fde8..5ce2b693bc5 100644 --- a/test/Transforms/Coroutines/coro-heap-elide.ll +++ b/test/Transforms/Coroutines/coro-heap-elide.ll @@ -81,6 +81,39 @@ entry: ret void } +; CHECK-LABEL: @callResume_PR34897_no_elision( +define void @callResume_PR34897_no_elision(i1 %cond) { +; CHECK-LABEL: entry: +entry: +; CHECK: call i8* @CustomAlloc( + %hdl = call i8* @f() +; CHECK: tail call void @bar( + tail call void @bar(i8* %hdl) +; CHECK: tail call void @bar( + tail call void @bar(i8* null) + br i1 %cond, label %if.then, label %if.else + +; CHECK-LABEL: if.then: +if.then: +; CHECK: call fastcc void bitcast (void (%f.frame*)* @f.resume to void (i8*)*)(i8* + %0 = call i8* @llvm.coro.subfn.addr(i8* %hdl, i8 0) + %1 = bitcast i8* %0 to void (i8*)* + call fastcc void %1(i8* %hdl) +; CHECK-NEXT: call fastcc void bitcast (void (%f.frame*)* @f.destroy to void (i8*)*)(i8* + %2 = call i8* @llvm.coro.subfn.addr(i8* %hdl, i8 1) + %3 = bitcast i8* %2 to void (i8*)* + call fastcc void %3(i8* %hdl) + br label %return + +if.else: + br label %return + +; CHECK-LABEL: return: +return: +; CHECK: ret void + ret void +} + ; a coroutine start function (cannot elide heap alloc, due to second argument to ; coro.begin not pointint to coro.alloc) define i8* @f_no_elision() personality i8* null {