diff --git a/include/llvm/IR/Statepoint.h b/include/llvm/IR/Statepoint.h index 5cd7fe1b576..783a5cc3c12 100644 --- a/include/llvm/IR/Statepoint.h +++ b/include/llvm/IR/Statepoint.h @@ -34,8 +34,15 @@ enum class StatepointFlags { None = 0, GCTransition = 1, ///< Indicates that this statepoint is a transition from ///< GC-aware code to code that is not GC-aware. + /// Mark the deopt arguments associated with the statepoint as only being + /// "live-in". By default, deopt arguments are "live-through". "live-through" + /// requires that they the value be live on entry, on exit, and at any point + /// during the call. "live-in" only requires the value be available at the + /// start of the call. In particular, "live-in" values can be placed in + /// unused argument registers or other non-callee saved registers. + DeoptLiveIn = 2, - MaskAll = GCTransition ///< A bitmask that includes all valid flags. + MaskAll = 3 ///< A bitmask that includes all valid flags. }; class GCRelocateInst; diff --git a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp index e996c89e681..a8d6847e93a 100644 --- a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -370,7 +370,7 @@ spillIncomingStatepointValue(SDValue Incoming, SDValue Chain, /// Lower a single value incoming to a statepoint node. This value can be /// either a deopt value or a gc value, the handling is the same. We special /// case constants and allocas, then fall back to spilling if required. -static void lowerIncomingStatepointValue(SDValue Incoming, +static void lowerIncomingStatepointValue(SDValue Incoming, bool LiveInOnly, SmallVectorImpl &Ops, SelectionDAGBuilder &Builder) { SDValue Chain = Builder.getRoot(); @@ -389,6 +389,14 @@ static void lowerIncomingStatepointValue(SDValue Incoming, // relocate the address of the alloca itself?) Ops.push_back(Builder.DAG.getTargetFrameIndex(FI->getIndex(), Incoming.getValueType())); + } else if (LiveInOnly) { + // If this value is live in (not live-on-return, or live-through), we can + // treat it the same way patchpoint treats it's "live in" values. We'll + // end up folding some of these into stack references, but they'll be + // handled by the register allocator. Note that we do not have the notion + // of a late use so these values might be placed in registers which are + // clobbered by the call. This is fine for live-in. + Ops.push_back(Incoming); } else { // Otherwise, locate a spill slot and explicitly spill it so it // can be found by the runtime later. We currently do not support @@ -439,19 +447,38 @@ lowerStatepointMetaArgs(SmallVectorImpl &Ops, "non gc managed derived pointer found in statepoint"); } } + assert(SI.Bases.size() == SI.Ptrs.size() && "Pointer without base!"); } else { assert(SI.Bases.empty() && "No gc specified, so cannot relocate pointers!"); assert(SI.Ptrs.empty() && "No gc specified, so cannot relocate pointers!"); } #endif + // Figure out what lowering strategy we're going to use for each part + // Note: Is is conservatively correct to lower both "live-in" and "live-out" + // as "live-through". A "live-through" variable is one which is "live-in", + // "live-out", and live throughout the lifetime of the call (i.e. we can find + // it from any PC within the transitive callee of the statepoint). In + // particular, if the callee spills callee preserved registers we may not + // be able to find a value placed in that register during the call. This is + // fine for live-out, but not for live-through. If we were willing to make + // assumptions about the code generator producing the callee, we could + // potentially allow live-through values in callee saved registers. + const bool LiveInDeopt = + SI.StatepointFlags & (uint64_t)StatepointFlags::DeoptLiveIn; + + auto isGCValue =[&](const Value *V) { + return is_contained(SI.Ptrs, V) || is_contained(SI.Bases, V); + }; + // Before we actually start lowering (and allocating spill slots for values), // reserve any stack slots which we judge to be profitable to reuse for a // particular value. This is purely an optimization over the code below and // doesn't change semantics at all. It is important for performance that we // reserve slots for both deopt and gc values before lowering either. for (const Value *V : SI.DeoptState) { - reservePreviousStackSlotForValue(V, Builder); + if (!LiveInDeopt || isGCValue(V)) + reservePreviousStackSlotForValue(V, Builder); } for (unsigned i = 0; i < SI.Bases.size(); ++i) { reservePreviousStackSlotForValue(SI.Bases[i], Builder); @@ -468,7 +495,8 @@ lowerStatepointMetaArgs(SmallVectorImpl &Ops, // what type of values are contained within. for (const Value *V : SI.DeoptState) { SDValue Incoming = Builder.getValue(V); - lowerIncomingStatepointValue(Incoming, Ops, Builder); + const bool LiveInValue = LiveInDeopt && !isGCValue(V); + lowerIncomingStatepointValue(Incoming, LiveInValue, Ops, Builder); } // Finally, go ahead and lower all the gc arguments. There's no prefixed @@ -478,10 +506,12 @@ lowerStatepointMetaArgs(SmallVectorImpl &Ops, // (base[0], ptr[0], base[1], ptr[1], ...) for (unsigned i = 0; i < SI.Bases.size(); ++i) { const Value *Base = SI.Bases[i]; - lowerIncomingStatepointValue(Builder.getValue(Base), Ops, Builder); + lowerIncomingStatepointValue(Builder.getValue(Base), /*LiveInOnly*/ false, + Ops, Builder); const Value *Ptr = SI.Ptrs[i]; - lowerIncomingStatepointValue(Builder.getValue(Ptr), Ops, Builder); + lowerIncomingStatepointValue(Builder.getValue(Ptr), /*LiveInOnly*/ false, + Ops, Builder); } // If there are any explicit spill slots passed to the statepoint, record diff --git a/lib/CodeGen/TargetInstrInfo.cpp b/lib/CodeGen/TargetInstrInfo.cpp index a5d480bc4cd..1652e48a328 100644 --- a/lib/CodeGen/TargetInstrInfo.cpp +++ b/lib/CodeGen/TargetInstrInfo.cpp @@ -448,6 +448,11 @@ static MachineInstr *foldPatchpoint(MachineFunction &MF, MachineInstr &MI, StartIdx = PatchPointOpers(&MI).getVarIdx(); break; } + case TargetOpcode::STATEPOINT: { + // For statepoints, fold deopt and gc arguments, but not call arguments. + StartIdx = StatepointOpers(&MI).getVarIdx(); + break; + } default: llvm_unreachable("unexpected stackmap opcode"); } @@ -513,7 +518,8 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI, MachineInstr *NewMI = nullptr; if (MI.getOpcode() == TargetOpcode::STACKMAP || - MI.getOpcode() == TargetOpcode::PATCHPOINT) { + MI.getOpcode() == TargetOpcode::PATCHPOINT || + MI.getOpcode() == TargetOpcode::STATEPOINT) { // Fold stackmap/patchpoint. NewMI = foldPatchpoint(MF, MI, Ops, FI, *this); if (NewMI) @@ -794,7 +800,8 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI, int FrameIndex = 0; if ((MI.getOpcode() == TargetOpcode::STACKMAP || - MI.getOpcode() == TargetOpcode::PATCHPOINT) && + MI.getOpcode() == TargetOpcode::PATCHPOINT || + MI.getOpcode() == TargetOpcode::STATEPOINT) && isLoadFromStackSlot(LoadMI, FrameIndex)) { // Fold stackmap/patchpoint. NewMI = foldPatchpoint(MF, MI, Ops, FrameIndex, *this); diff --git a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp index 9bcf8944660..b07d9c90e37 100644 --- a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp +++ b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp @@ -1273,6 +1273,24 @@ public: }; } +static StringRef getDeoptLowering(CallSite CS) { + const char *DeoptLowering = "deopt-lowering"; + if (CS.hasFnAttr(DeoptLowering)) { + // FIXME: CallSite has a *really* confusing interface around attributes + // with values. + const AttributeSet &CSAS = CS.getAttributes(); + if (CSAS.hasAttribute(AttributeSet::FunctionIndex, + DeoptLowering)) + return CSAS.getAttribute(AttributeSet::FunctionIndex, + DeoptLowering).getValueAsString(); + Function *F = CS.getCalledFunction(); + assert(F && F->hasFnAttribute(DeoptLowering)); + return F->getFnAttribute(DeoptLowering).getValueAsString(); + } + return "live-through"; +} + + static void makeStatepointExplicitImpl(const CallSite CS, /* to replace */ const SmallVectorImpl &BasePtrs, @@ -1314,6 +1332,14 @@ makeStatepointExplicitImpl(const CallSite CS, /* to replace */ if (SD.StatepointID) StatepointID = *SD.StatepointID; + // Pass through the requested lowering if any. The default is live-through. + StringRef DeoptLowering = getDeoptLowering(CS); + if (DeoptLowering.equals("live-in")) + Flags |= uint32_t(StatepointFlags::DeoptLiveIn); + else { + assert(DeoptLowering.equals("live-through") && "Unsupported value!"); + } + Value *CallTarget = CS.getCalledValue(); if (Function *F = dyn_cast(CallTarget)) { if (F->getIntrinsicID() == Intrinsic::experimental_deoptimize) { diff --git a/test/CodeGen/X86/statepoint-live-in.ll b/test/CodeGen/X86/statepoint-live-in.ll new file mode 100644 index 00000000000..d69abd87424 --- /dev/null +++ b/test/CodeGen/X86/statepoint-live-in.ll @@ -0,0 +1,131 @@ +; RUN: llc -O3 < %s | FileCheck %s +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.11.0" + +declare void @bar() #0 +declare void @baz() + +define void @test1(i32 %a) gc "statepoint-example" { +entry: +; We expect the argument to be passed in an extra register to bar +; CHECK-LABEL: test1 +; CHECK: pushq %rax +; CHECK-NEXT: Ltmp0: +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: callq _bar + %statepoint_token1 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 2, i32 0, i32 1, i32 %a) + ret void +} + +define void @test2(i32 %a, i32 %b) gc "statepoint-example" { +entry: +; Because the first call clobbers esi, we have to move the values into +; new registers. Note that they stay in the registers for both calls. +; CHECK-LABEL: @test2 +; CHECK: movl %esi, %ebx +; CHECK-NEXT: movl %edi, %ebp +; CHECK-NEXT: callq _bar + call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 2, i32 0, i32 2, i32 %a, i32 %b) + call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 2, i32 0, i32 2, i32 %b, i32 %a) + ret void +} + +define void @test3(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i) gc "statepoint-example" { +entry: +; TODO: We should have folded the reload into the statepoint. +; CHECK-LABEL: @test3 +; CHECK: movl 32(%rsp), %r10d +; CHECK-NEXT: movl 24(%rsp), %r11d +; CHECK-NEXT: movl 16(%rsp), %eax +; CHECK-NEXT: callq _bar + %statepoint_token1 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 2, i32 0, i32 9, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i) + ret void +} + +; This case just confirms that we don't crash when given more live values +; than registers. This is a case where we *have* to use a stack slot. +define void @test4(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n, i32 %o, i32 %p, i32 %q, i32 %r, i32 %s, i32 %t, i32 %u, i32 %v, i32 %w, i32 %x, i32 %y, i32 %z) gc "statepoint-example" { +entry: +; TODO: We should have folded the reload into the statepoint. +; CHECK-LABEL: test4 +; CHECK: pushq %r15 +; CHECK: pushq %r14 +; CHECK: pushq %r13 +; CHECK: pushq %r12 +; CHECK: pushq %rbx +; CHECK: pushq %rax +; CHECK: movl 128(%rsp), %r13d +; CHECK-NEXT: movl 120(%rsp), %r12d +; CHECK-NEXT: movl 112(%rsp), %r15d +; CHECK-NEXT: movl 104(%rsp), %r14d +; CHECK-NEXT: movl 96(%rsp), %ebp +; CHECK-NEXT: movl 88(%rsp), %ebx +; CHECK-NEXT: movl 80(%rsp), %r11d +; CHECK-NEXT: movl 72(%rsp), %r10d +; CHECK-NEXT: movl 64(%rsp), %eax +; CHECK-NEXT: callq _bar + %statepoint_token1 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 2, i32 0, i32 26, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n, i32 %o, i32 %p, i32 %q, i32 %r, i32 %s, i32 %t, i32 %u, i32 %v, i32 %w, i32 %x, i32 %y, i32 %z) + ret void +} + +; A live-through gc-value must be spilled even if it is also a live-in deopt +; value. For live-in, we could technically report the register copy, but from +; a code quality perspective it's better to reuse the required stack slot so +; as to put less stress on the register allocator for no benefit. +define i32 addrspace(1)* @test5(i32 %a, i32 addrspace(1)* %p) gc "statepoint-example" { +entry: +; CHECK-LABEL: test5 +; CHECK: movq %rsi, (%rsp) +; CHECK-NEXT: callq _bar + %token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 2, i32 0, i32 1, i32 %a, i32 addrspace(1)* %p, i32 addrspace(1)* %p) + %p2 = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %token, i32 9, i32 9) + ret i32 addrspace(1)* %p2 +} + +; Show the interaction of live-through spilling followed by live-in. +define void @test6(i32 %a) gc "statepoint-example" { +entry: +; TODO: We could have reused the previous spill slot at zero additional cost. +; CHECK-LABEL: test6 +; CHECK: movl %edi, %ebx +; CHECK: movl %ebx, 12(%rsp) +; CHECK-NEXT: callq _baz +; CHECK-NEXT: Ltmp30: +; CHECK-NEXT: callq _bar + call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @baz, i32 0, i32 0, i32 0, i32 1, i32 %a) + call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 2, i32 0, i32 1, i32 %a) + ret void +} + + +; CHECK: Ltmp1-_test1 +; CHECK: .byte 1 +; CHECK-NEXT: .byte 4 +; CHECK-NEXT: .short 5 +; CHECK-NEXT: .long 0 + +; CHECK: Ltmp7-_test2 +; CHECK: .byte 1 +; CHECK-NEXT: .byte 4 +; CHECK-NEXT: .short 6 +; CHECK-NEXT: .long 0 +; CHECK: .byte 1 +; CHECK-NEXT: .byte 4 +; CHECK-NEXT: .short 3 +; CHECK-NEXT: .long 0 +; CHECK: Ltmp8-_test2 +; CHECK: .byte 1 +; CHECK-NEXT: .byte 4 +; CHECK-NEXT: .short 3 +; CHECK-NEXT: .long 0 +; CHECK: .byte 1 +; CHECK-NEXT: .byte 4 +; CHECK-NEXT: .short 6 +; CHECK-NEXT: .long 0 + +declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...) +declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token, i32, i32) + + +attributes #0 = { "deopt-lowering"="live-in" } +attributes #1 = { "deopt-lowering"="live-through" } diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-lowering-attrs.ll b/test/Transforms/RewriteStatepointsForGC/deopt-lowering-attrs.ll new file mode 100644 index 00000000000..22ca5cad1da --- /dev/null +++ b/test/Transforms/RewriteStatepointsForGC/deopt-lowering-attrs.ll @@ -0,0 +1,23 @@ +; RUN: opt -rewrite-statepoints-for-gc -S < %s | FileCheck %s +; Check that the "deopt-lowering" function attribute gets transcoded into +; flags on the resulting statepoint + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.11.0" + +declare void @foo() +declare void @bar() "deopt-lowering"="live-in" +declare void @baz() "deopt-lowering"="live-through" + +define void @test1() gc "statepoint-example" { +; CHECK-LABEL: @test1( +; CHECK: @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 1, i32 57) +; CHECK: @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 2, i32 0, i32 1, i32 42) +; CHECK: @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @baz, i32 0, i32 0, i32 0, i32 1, i32 13) + +entry: + call void @foo() [ "deopt"(i32 57) ] + call void @bar() [ "deopt"(i32 42) ] + call void @baz() [ "deopt"(i32 13) ] + ret void +}