Eliminate redundant CR moves on PPC32.

The 32-bit ABI requires CR bit 6 to be set if the call has fp arguments and
unset if it doesn't. The solution up to now was to insert a MachineNode to
set/unset the CR bit, which produces a CR vreg. This vreg was then copied
into CR bit 6. When the register allocator saw a bunch of these in the same
function, it allocated the set/unset CR bit in some random CR register (1
extra instruction) and then emitted CR moves before every vararg function
call, rather than just setting and unsetting CR bit 6 directly before every
vararg function call. This patch instead inserts a PPCcrset/PPCcrunset
instruction which are then matched by a dedicated instruction pattern.

Patch by Tobias von Koch.

llvm-svn: 162725
This commit is contained in:
Hal Finkel 2012-08-28 02:10:27 +00:00
parent caa4701e37
commit d28587407f
4 changed files with 60 additions and 8 deletions

View File

@ -517,6 +517,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ";
case PPCISD::MTFSF: return "PPCISD::MTFSF";
case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN";
case PPCISD::CR6SET: return "PPCISD::CR6SET";
case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET";
}
}
@ -2834,6 +2836,10 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
isTailCall, RegsToPass, Ops, NodeTys,
PPCSubTarget);
// Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
if (isVarArg && PPCSubTarget.isSVR4ABI() && !PPCSubTarget.isPPC64())
Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
// When performing tail call optimization the callee pops its arguments off
// the stack. Account for this here so these bytes can be pushed back on in
// PPCRegisterInfo::eliminateCallFramePseudoInstr.
@ -3131,14 +3137,6 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee,
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
&MemOpChains[0], MemOpChains.size());
// Set CR6 to true if this is a vararg call with floating args passed in
// registers.
if (isVarArg) {
SDValue SetCR(DAG.getMachineNode(seenFloatArg ? PPC::CRSET : PPC::CRUNSET,
dl, MVT::i32), 0);
RegsToPass.push_back(std::make_pair(unsigned(PPC::CR1EQ), SetCR));
}
// Build a sequence of copy-to-reg nodes chained together with token chain
// and flag operands which copy the outgoing args into the appropriate regs.
SDValue InFlag;
@ -3148,6 +3146,14 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee,
InFlag = Chain.getValue(1);
}
// Set CR bit 6 to true if this is a vararg call with floating args passed in
// registers.
if (isVarArg) {
Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET,
dl, DAG.getVTList(MVT::Other, MVT::Glue), Chain);
InFlag = Chain.getValue(1);
}
if (isTailCall)
PrepareTailCall(DAG, InFlag, Chain, dl, false, SPDiff, NumBytes, LROp, FPOp,
false, TailCallArguments);

View File

@ -174,6 +174,10 @@ namespace llvm {
/// operand #3 optional in flag
TC_RETURN,
/// ch, gl = CR6[UN]SET ch, inglue - Toggle CR bit 6 for SVR4 vararg calls
CR6SET,
CR6UNSET,
/// STD_32 - This is the STD instruction for use with "32-bit" registers.
STD_32 = ISD::FIRST_TARGET_MEMORY_OPCODE,

View File

@ -155,6 +155,12 @@ def PPClbrx : SDNode<"PPCISD::LBRX", SDT_PPClbrx,
def PPCstbrx : SDNode<"PPCISD::STBRX", SDT_PPCstbrx,
[SDNPHasChain, SDNPMayStore]>;
// Instructions to set/unset CR bit 6 for SVR4 vararg calls
def PPCcr6set : SDNode<"PPCISD::CR6SET", SDTNone,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
def PPCcr6unset : SDNode<"PPCISD::CR6UNSET", SDTNone,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
// Instructions to support atomic operations
def PPClarx : SDNode<"PPCISD::LARX", SDT_PPClarx,
[SDNPHasChain, SDNPMayLoad]>;
@ -1145,6 +1151,16 @@ def CRUNSET: XLForm_1_ext<19, 193, (outs CRBITRC:$dst), (ins),
"crxor $dst, $dst, $dst", BrCR,
[]>;
let Defs = [CR1EQ], CRD = 6 in {
def CR6SET : XLForm_1_ext<19, 289, (outs), (ins),
"creqv 6, 6, 6", BrCR,
[(PPCcr6set)]>;
def CR6UNSET: XLForm_1_ext<19, 193, (outs), (ins),
"crxor 6, 6, 6", BrCR,
[(PPCcr6unset)]>;
}
// XFX-Form instructions. Instructions that deal with SPRs.
//
let Uses = [CTR] in {

View File

@ -0,0 +1,26 @@
; RUN: llc < %s | FileCheck %s
target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32"
target triple = "powerpc-unknown-linux"
@.str = private unnamed_addr constant [3 x i8] c"%i\00", align 1
define void @test(i32 %count) nounwind {
entry:
; CHECK: crxor 6, 6, 6
%call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i32 1) nounwind
%cmp2 = icmp sgt i32 %count, 0
br i1 %cmp2, label %for.body, label %for.end
for.body: ; preds = %entry, %for.body
%i.03 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
; CHECK: crxor 6, 6, 6
%call1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i32 1) nounwind
%inc = add nsw i32 %i.03, 1
%exitcond = icmp eq i32 %inc, %count
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body, %entry
ret void
}
declare i32 @printf(i8* nocapture, ...) nounwind