Allow PPC B and BLR to be if-converted into some predicated forms

This enables us to form predicated branches (which are the same conditional
branches we had before) and also a larger set of predicated returns (including
instructions like bdnzlr which is a conditional return and loop-counter
decrement all in one).

At the moment, if conversion does not capture all possible opportunities. A
simple example is provided in early-ret2.ll, where if conversion forms one
predicated return, and then the PPCEarlyReturn pass picks up the other one. So,
at least for now, we'll keep both mechanisms.

llvm-svn: 179134
This commit is contained in:
Hal Finkel 2013-04-09 22:58:37 +00:00
parent 5bc0fb7e93
commit 8b05494b58
7 changed files with 297 additions and 0 deletions

View File

@ -83,8 +83,17 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
def BDNZ8 : BForm_1<16, 16, 0, 0, (outs), (ins condbrtarget:$dst),
"bdnz $dst">;
}
let isReturn = 1, Defs = [CTR8], Uses = [CTR8, LR8, RM] in {
def BDZLR8 : XLForm_2_ext<19, 16, 18, 0, 0, (outs), (ins),
"bdzlr", BrB, []>;
def BDNZLR8 : XLForm_2_ext<19, 16, 16, 0, 0, (outs), (ins),
"bdnzlr", BrB, []>;
}
}
let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in {
// Convenient aliases for call instructions
let Uses = [RM] in {

View File

@ -876,6 +876,143 @@ bool PPCInstrInfo::FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
return true;
}
bool PPCInstrInfo::isPredicated(const MachineInstr *MI) const {
unsigned OpC = MI->getOpcode();
switch (OpC) {
default:
return false;
case PPC::BCC:
case PPC::BCLR:
case PPC::BDZLR:
case PPC::BDZLR8:
case PPC::BDNZLR:
case PPC::BDNZLR8:
return true;
}
}
bool PPCInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
if (!MI->isTerminator())
return false;
// Conditional branch is a special case.
if (MI->isBranch() && !MI->isBarrier())
return true;
return !isPredicated(MI);
}
bool PPCInstrInfo::PredicateInstruction(
MachineInstr *MI,
const SmallVectorImpl<MachineOperand> &Pred) const {
unsigned OpC = MI->getOpcode();
if (OpC == PPC::BLR) {
if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR) {
bool isPPC64 = TM.getSubtargetImpl()->isPPC64();
MI->setDesc(get(Pred[0].getImm() ?
(isPPC64 ? PPC::BDNZLR8 : PPC::BDNZLR) :
(isPPC64 ? PPC::BDZLR8 : PPC::BDZLR)));
} else {
MI->setDesc(get(PPC::BCLR));
MachineInstrBuilder(*MI->getParent()->getParent(), MI)
.addImm(Pred[0].getImm())
.addReg(Pred[1].getReg());
}
return true;
} else if (OpC == PPC::B) {
if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR) {
bool isPPC64 = TM.getSubtargetImpl()->isPPC64();
MI->setDesc(get(Pred[0].getImm() ?
(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) :
(isPPC64 ? PPC::BDZ8 : PPC::BDZ)));
} else {
MachineBasicBlock *MBB = MI->getOperand(0).getMBB();
MI->RemoveOperand(0);
MI->setDesc(get(PPC::BCC));
MachineInstrBuilder(*MI->getParent()->getParent(), MI)
.addImm(Pred[0].getImm())
.addReg(Pred[1].getReg())
.addMBB(MBB);
}
return true;
}
return false;
}
bool PPCInstrInfo::SubsumesPredicate(
const SmallVectorImpl<MachineOperand> &Pred1,
const SmallVectorImpl<MachineOperand> &Pred2) const {
assert(Pred1.size() == 2 && "Invalid PPC first predicate");
assert(Pred2.size() == 2 && "Invalid PPC second predicate");
if (Pred1[1].getReg() == PPC::CTR8 || Pred1[1].getReg() == PPC::CTR)
return false;
if (Pred2[1].getReg() == PPC::CTR8 || Pred2[1].getReg() == PPC::CTR)
return false;
PPC::Predicate P1 = (PPC::Predicate) Pred1[0].getImm();
PPC::Predicate P2 = (PPC::Predicate) Pred2[0].getImm();
if (P1 == P2)
return true;
// Does P1 subsume P2, e.g. GE subsumes GT.
if (P1 == PPC::PRED_LE &&
(P2 == PPC::PRED_LT || P2 == PPC::PRED_EQ))
return true;
if (P1 == PPC::PRED_GE &&
(P2 == PPC::PRED_GT || P2 == PPC::PRED_EQ))
return true;
return false;
}
bool PPCInstrInfo::DefinesPredicate(MachineInstr *MI,
std::vector<MachineOperand> &Pred) const {
// Note: At the present time, the contents of Pred from this function is
// unused by IfConversion. This implementation follows ARM by pushing the
// CR-defining operand. Because the 'DZ' and 'DNZ' count as types of
// predicate, instructions defining CTR or CTR8 are also included as
// predicate-defining instructions.
const TargetRegisterClass *RCs[] =
{ &PPC::CRRCRegClass, &PPC::CRBITRCRegClass,
&PPC::CTRRCRegClass, &PPC::CTRRC8RegClass };
bool Found = false;
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
for (int c = 0; c < 2 && !Found; ++c) {
const TargetRegisterClass *RC = RCs[c];
for (TargetRegisterClass::iterator I = RC->begin(),
IE = RC->end(); I != IE; ++I) {
if ((MO.isRegMask() && MO.clobbersPhysReg(*I)) ||
(MO.isReg() && MO.isDef() && MO.getReg() == *I)) {
Pred.push_back(MO);
Found = true;
}
}
}
}
return Found;
}
bool PPCInstrInfo::isPredicable(MachineInstr *MI) const {
unsigned OpC = MI->getOpcode();
switch (OpC) {
default:
return false;
case PPC::B:
case PPC::BLR:
return true;
}
}
/// GetInstSize - Return the number of bytes of code the specified
/// instruction may be. This returns the maximum number of bytes.
///

View File

@ -160,6 +160,53 @@ public:
virtual bool FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
unsigned Reg, MachineRegisterInfo *MRI) const;
// If conversion by predication (only supported by some branch instructions).
// All of the profitability checks always return true; it is always
// profitable to use the predicated branches.
virtual bool isProfitableToIfCvt(MachineBasicBlock &MBB,
unsigned NumCycles, unsigned ExtraPredCycles,
const BranchProbability &Probability) const {
return true;
}
virtual bool isProfitableToIfCvt(MachineBasicBlock &TMBB,
unsigned NumT, unsigned ExtraT,
MachineBasicBlock &FMBB,
unsigned NumF, unsigned ExtraF,
const BranchProbability &Probability) const {
return true;
}
virtual bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB,
unsigned NumCycles,
const BranchProbability
&Probability) const {
return true;
}
virtual bool isProfitableToUnpredicate(MachineBasicBlock &TMBB,
MachineBasicBlock &FMBB) const {
return false;
}
// Predication support.
bool isPredicated(const MachineInstr *MI) const;
virtual bool isUnpredicatedTerminator(const MachineInstr *MI) const;
virtual
bool PredicateInstruction(MachineInstr *MI,
const SmallVectorImpl<MachineOperand> &Pred) const;
virtual
bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
const SmallVectorImpl<MachineOperand> &Pred2) const;
virtual bool DefinesPredicate(MachineInstr *MI,
std::vector<MachineOperand> &Pred) const;
virtual bool isPredicable(MachineInstr *MI) const;
/// GetInstSize - Return the number of bytes of code the specified
/// instruction may be. This returns the maximum number of bytes.
///

View File

@ -518,6 +518,13 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
let isReturn = 1, Uses = [LR, RM] in
def BCLR : XLForm_2_br<19, 16, 0, (outs), (ins pred:$cond),
"b${cond:cc}lr ${cond:reg}", BrB, []>;
let isReturn = 1, Defs = [CTR], Uses = [CTR, LR, RM] in {
def BDZLR : XLForm_2_ext<19, 16, 18, 0, 0, (outs), (ins),
"bdzlr", BrB, []>;
def BDNZLR : XLForm_2_ext<19, 16, 16, 0, 0, (outs), (ins),
"bdnzlr", BrB, []>;
}
}
let Defs = [CTR], Uses = [CTR] in {

View File

@ -93,6 +93,7 @@ public:
virtual bool addPreRegAlloc();
virtual bool addILPOpts();
virtual bool addInstSelector();
virtual bool addPreSched2();
virtual bool addPreEmitPass();
};
} // namespace
@ -123,6 +124,13 @@ bool PPCPassConfig::addInstSelector() {
return false;
}
bool PPCPassConfig::addPreSched2() {
if (getOptLevel() != CodeGenOpt::None)
addPass(&IfConverterID);
return true;
}
bool PPCPassConfig::addPreEmitPass() {
if (getOptLevel() != CodeGenOpt::None)
addPass(createPPCEarlyReturnPass());

View File

@ -0,0 +1,63 @@
; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
target triple = "powerpc64-unknown-linux-gnu"
%struct.lua_TValue.17.692 = type { %union.Value.16.691, i32 }
%union.Value.16.691 = type { %union.GCObject.15.690* }
%union.GCObject.15.690 = type { %struct.lua_State.14.689 }
%struct.lua_State.14.689 = type { %union.GCObject.15.690*, i8, i8, i8, %struct.lua_TValue.17.692*, %struct.lua_TValue.17.692*, %struct.global_State.10.685*, %struct.CallInfo.11.686*, i32*, %struct.lua_TValue.17.692*, %struct.lua_TValue.17.692*, %struct.CallInfo.11.686*, %struct.CallInfo.11.686*, i32, i32, i16, i16, i8, i8, i32, i32, void (%struct.lua_State.14.689*, %struct.lua_Debug.12.687*)*, %struct.lua_TValue.17.692, %struct.lua_TValue.17.692, %union.GCObject.15.690*, %union.GCObject.15.690*, %struct.lua_longjmp.13.688*, i64 }
%struct.global_State.10.685 = type { %struct.stringtable.0.675, i8* (i8*, i8*, i64, i64)*, i8*, i8, i8, i32, %union.GCObject.15.690*, %union.GCObject.15.690**, %union.GCObject.15.690*, %union.GCObject.15.690*, %union.GCObject.15.690*, %union.GCObject.15.690*, %struct.Mbuffer.1.676, i64, i64, i64, i64, i32, i32, i32 (%struct.lua_State.14.689*)*, %struct.lua_TValue.17.692, %struct.lua_State.14.689*, %struct.UpVal.3.678, [9 x %struct.Table.7.682*], [17 x %union.TString.9.684*] }
%struct.stringtable.0.675 = type { %union.GCObject.15.690**, i32, i32 }
%struct.Mbuffer.1.676 = type { i8*, i64, i64 }
%struct.UpVal.3.678 = type { %union.GCObject.15.690*, i8, i8, %struct.lua_TValue.17.692*, %union.anon.2.677 }
%union.anon.2.677 = type { %struct.lua_TValue.17.692 }
%struct.Table.7.682 = type { %union.GCObject.15.690*, i8, i8, i8, i8, %struct.Table.7.682*, %struct.lua_TValue.17.692*, %struct.Node.6.681*, %struct.Node.6.681*, %union.GCObject.15.690*, i32 }
%struct.Node.6.681 = type { %struct.lua_TValue.17.692, %union.TKey.5.680 }
%union.TKey.5.680 = type { %struct.anon.0.4.679 }
%struct.anon.0.4.679 = type { %union.Value.16.691, i32, %struct.Node.6.681* }
%union.TString.9.684 = type { %struct.anon.1.8.683 }
%struct.anon.1.8.683 = type { %union.GCObject.15.690*, i8, i8, i8, i32, i64 }
%struct.CallInfo.11.686 = type { %struct.lua_TValue.17.692*, %struct.lua_TValue.17.692*, %struct.lua_TValue.17.692*, i32*, i32, i32 }
%struct.lua_Debug.12.687 = type { i32, i8*, i8*, i8*, i8*, i32, i32, i32, i32, [60 x i8], i32 }
%struct.lua_longjmp.13.688 = type opaque
define void @lua_xmove(i32 signext %n) #0 {
entry:
br i1 undef, label %for.end, label %if.end
if.end: ; preds = %entry
br i1 undef, label %for.body.lr.ph, label %for.end
for.body.lr.ph: ; preds = %if.end
br label %for.body
for.body: ; preds = %for.body.for.body_crit_edge, %for.body.lr.ph
%0 = phi %struct.lua_TValue.17.692* [ undef, %for.body.lr.ph ], [ %.pre, %for.body.for.body_crit_edge ]
%indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body.for.body_crit_edge ]
%tt = getelementptr inbounds %struct.lua_TValue.17.692* %0, i64 %indvars.iv, i32 1
%1 = load i32* %tt, align 4, !tbaa !0
store i32 %1, i32* undef, align 4, !tbaa !0
%indvars.iv.next = add i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp eq i32 %lftr.wideiv, %n
br i1 %exitcond, label %for.end, label %for.body.for.body_crit_edge
for.body.for.body_crit_edge: ; preds = %for.body
%.pre = load %struct.lua_TValue.17.692** undef, align 8, !tbaa !3
br label %for.body
for.end: ; preds = %for.body, %if.end, %entry
ret void
; CHECK: @lua_xmove
; CHECK: bnelr
; CHECK: bnelr
; CHECK: bdzlr
}
attributes #0 = { nounwind }
!0 = metadata !{metadata !"int", metadata !1}
!1 = metadata !{metadata !"omnipotent char", metadata !2}
!2 = metadata !{metadata !"Simple C/C++ TBAA"}
!3 = metadata !{metadata !"any pointer", metadata !1}

View File

@ -0,0 +1,26 @@
; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
target triple = "powerpc64-unknown-linux-gnu"
define void @_Z8example3iPiS_() #0 {
entry:
br i1 undef, label %while.end, label %while.body.lr.ph
while.body.lr.ph: ; preds = %entry
br i1 undef, label %while.end, label %while.body
while.body: ; preds = %while.body, %while.body.lr.ph
br i1 false, label %while.end, label %while.body, !llvm.vectorizer.already_vectorized !0
while.end: ; preds = %while.body, %while.body.lr.ph, %entry
ret void
; CHECK: @_Z8example3iPiS_
; CHECK: bnelr
; CHECK: bnelr
}
attributes #0 = { noinline nounwind }
!0 = metadata !{}