[RS4GC] Better codegen for deoptimize calls

Don't emit a gc.result for a statepoint lowered from
@llvm.experimental.deoptimize since the call into __llvm_deoptimize is
effectively noreturn.  Instead follow the corresponding gc.statepoint
with an "unreachable".

llvm-svn: 265485
This commit is contained in:
Sanjoy Das 2016-04-05 23:18:35 +00:00
parent 00178d3519
commit 685aab406c
2 changed files with 66 additions and 18 deletions

View File

@ -1278,6 +1278,9 @@ namespace {
class DeferredReplacement {
AssertingVH<Instruction> Old;
AssertingVH<Instruction> New;
bool IsDeoptimize = false;
DeferredReplacement() {}
public:
explicit DeferredReplacement(Instruction *Old, Instruction *New) :
@ -1285,18 +1288,40 @@ public:
assert(Old != New && "Not allowed!");
}
static DeferredReplacement createDeoptimizeReplacement(Instruction *Old) {
#ifndef NDEBUG
auto *F = cast<CallInst>(Old)->getCalledFunction();
assert(F && F->getIntrinsicID() == Intrinsic::experimental_deoptimize &&
"Only way to construct a deoptimize deferred replacement");
#endif
DeferredReplacement D;
D.Old = Old;
D.IsDeoptimize = true;
return D;
}
/// Does the task represented by this instance.
void doReplacement() {
Instruction *OldI = Old;
Instruction *NewI = New;
assert(OldI != NewI && "Disallowed at construction?!");
assert(!IsDeoptimize || !New && "Deoptimize instrinsics are not replaced!");
Old = nullptr;
New = nullptr;
if (NewI)
OldI->replaceAllUsesWith(NewI);
if (IsDeoptimize) {
// Note: we've inserted instructions, so the call to llvm.deoptimize may
// not necessarilly be followed by the matching return.
auto *RI = cast<ReturnInst>(OldI->getParent()->getTerminator());
new UnreachableInst(RI->getContext(), RI);
RI->eraseFromParent();
}
OldI->eraseFromParent();
}
};
@ -1330,6 +1355,7 @@ makeStatepointExplicitImpl(const CallSite CS, /* to replace */
Flags |= uint32_t(StatepointFlags::GCTransition);
TransitionArgs = TransitionBundle->Inputs;
}
bool IsDeoptimize = false;
StatepointDirectives SD =
parseStatepointDirectivesFromAttrs(CS.getAttributes());
@ -1348,7 +1374,7 @@ makeStatepointExplicitImpl(const CallSite CS, /* to replace */
SmallVector<Type *, 8> DomainTy;
for (Value *Arg : CallArgs)
DomainTy.push_back(Arg->getType());
auto *FTy = FunctionType::get(F->getReturnType(), DomainTy,
auto *FTy = FunctionType::get(Type::getVoidTy(F->getContext()), DomainTy,
/* isVarArg = */ false);
// Note: CallTarget can be a bitcast instruction of a symbol if there are
@ -1357,6 +1383,8 @@ makeStatepointExplicitImpl(const CallSite CS, /* to replace */
// was doing when generating this kind of IR.
CallTarget =
F->getParent()->getOrInsertFunction("__llvm_deoptimize", FTy);
IsDeoptimize = true;
}
}
@ -1440,6 +1468,13 @@ makeStatepointExplicitImpl(const CallSite CS, /* to replace */
}
assert(Token && "Should be set in one of the above branches!");
if (IsDeoptimize) {
// If we're wrapping an @llvm.experimental.deoptimize in a statepoint, we
// transform the tail-call like structure to a call to a void function
// followed by unreachable to get better codegen.
Replacements.push_back(
DeferredReplacement::createDeoptimizeReplacement(CS.getInstruction()));
} else {
Token->setName("statepoint_token");
if (!CS.getType()->isVoidTy() && !CS.getInstruction()->use_empty()) {
StringRef Name =
@ -1457,6 +1492,7 @@ makeStatepointExplicitImpl(const CallSite CS, /* to replace */
} else {
Replacements.emplace_back(CS.getInstruction(), nullptr);
}
}
Result.StatepointToken = Token;

View File

@ -4,10 +4,12 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.11.0"
declare i32 @llvm.experimental.deoptimize.i32(...)
declare void @llvm.experimental.deoptimize.isVoid(...)
define i32 @caller_0(i32 addrspace(1)* %ptr) gc "statepoint-example" {
; CHECK-LABEL: @caller_0(
; CHECK: @llvm.experimental.gc.statepoint.p0f_i32f(i64 2882400000, i32 0, i32 ()* @__llvm_deoptimize, i32 0
; CHECK: @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @__llvm_deoptimize, i32 0
; CHECK: unreachable
entry:
%v = call i32(...) @llvm.experimental.deoptimize.i32() [ "deopt"(i32 0, i32 addrspace(1)* %ptr) ]
ret i32 %v
@ -16,8 +18,18 @@ entry:
define i32 @caller_1(i32 addrspace(1)* %ptr) gc "statepoint-example" {
; CHECK-LABEL: @caller_1
; CHECK: @llvm.experimental.gc.statepoint.p0f_i32i32p1i32f(i64 2882400000, i32 0, i32 (i32, i32 addrspace(1)*)* bitcast (i32 ()* @__llvm_deoptimize to i32 (i32, i32 addrspace(1)*)*), i32 2, i32 0, i32 50, i32 addrspace(1)* %ptr
; CHECK: @llvm.experimental.gc.statepoint.p0f_isVoidi32p1i32f(i64 2882400000, i32 0, void (i32, i32 addrspace(1)*)* bitcast (void ()* @__llvm_deoptimize to void (i32, i32 addrspace(1)*)*), i32 2, i32 0, i32 50, i32 addrspace(1)* %ptr
; CHECK: unreachable
entry:
%v = call i32(...) @llvm.experimental.deoptimize.i32(i32 50, i32 addrspace(1)* %ptr) [ "deopt"(i32 0) ]
ret i32 %v
}
define void @caller_2(i32 addrspace(1)* %ptr) gc "statepoint-example" {
; CHECK-LABEL: @caller_2(
; CHECK: @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @__llvm_deoptimize, i32 0
; CHECK: unreachable
entry:
call void(...) @llvm.experimental.deoptimize.isVoid() [ "deopt"(i32 0, i32 addrspace(1)* %ptr) ]
ret void
}