[PowerPC] Convert r+r instructions to r+i (pre and post RA)

This patch adds the necessary infrastructure to convert instructions that
take two register operands to those that take a register and immediate if
the necessary operand is produced by a load-immediate. Furthermore, it uses
this infrastructure to perform such conversions twice - first at MachineSSA
and then pre-emit.

There are a number of reasons we may end up with opportunities for this
transformation, including but not limited to:
- X-Form instructions chosen since the exact offset isn't available at ISEL time
- Atomic instructions with constant operands (we will add patterns for this
  in the future)
- Tail duplication may duplicate code where one block contains this redundancy
- When emitting compare-free code in PPCDAGToDAGISel, we don't handle constant
  comparands specially

Furthermore, this patch moves the initialization of PPCMIPeepholePass so that
it can be used for MIR tests.

llvm-svn: 320791
This commit is contained in:
Nemanja Ivanovic 2017-12-15 07:27:53 +00:00
parent 14d377e2bb
commit b8102d2cc0
17 changed files with 7699 additions and 90 deletions

View File

@ -43,6 +43,7 @@ add_llvm_target(PowerPCCodeGen
PPCVSXFMAMutate.cpp
PPCVSXSwapRemoval.cpp
PPCExpandISEL.cpp
PPCPreEmitPeephole.cpp
)
add_subdirectory(AsmParser)

View File

@ -50,6 +50,7 @@ namespace llvm {
FunctionPass *createPPCTLSDynamicCallPass();
FunctionPass *createPPCBoolRetToIntPass();
FunctionPass *createPPCExpandISELPass();
FunctionPass *createPPCPreEmitPeepholePass();
void LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
AsmPrinter &AP, bool isDarwin);
bool LowerPPCMachineOperandToMCOperand(const MachineOperand &MO,
@ -59,7 +60,9 @@ namespace llvm {
void initializePPCVSXFMAMutatePass(PassRegistry&);
void initializePPCBoolRetToIntPass(PassRegistry&);
void initializePPCExpandISELPass(PassRegistry &);
void initializePPCPreEmitPeepholePass(PassRegistry &);
void initializePPCTLSDynamicCallPass(PassRegistry &);
void initializePPCMIPeepholePass(PassRegistry&);
extern char &PPCVSXFMAMutateID;
namespace PPCII {

View File

@ -194,6 +194,11 @@ def : Pat<(PPCcall_nop (i64 texternalsym:$dst)),
(BL8_NOP texternalsym:$dst)>;
// Atomic operations
// FIXME: some of these might be used with constant operands. This will result
// in constant materialization instructions that may be redundant. We currently
// clean this up in PPCMIPeephole with calls to
// PPCInstrInfo::convertToImmediateForm() but we should probably not emit them
// in the first place.
let usesCustomInserter = 1 in {
let Defs = [CR0] in {
def ATOMIC_LOAD_ADD_I64 : Pseudo<

View File

@ -51,6 +51,10 @@ STATISTIC(NumStoreSPILLVSRRCAsVec,
STATISTIC(NumStoreSPILLVSRRCAsGpr,
"Number of spillvsrrc spilled to stack as gpr");
STATISTIC(NumGPRtoVSRSpill, "Number of gpr spills to spillvsrrc");
STATISTIC(CmpIselsConverted,
"Number of ISELs that depend on comparison of constants converted");
STATISTIC(MissedConvertibleImmediateInstrs,
"Number of compare-immediate instructions fed by constants");
static cl::
opt<bool> DisableCTRLoopAnal("disable-ppc-ctrloop-analysis", cl::Hidden,
@ -2147,6 +2151,816 @@ bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
return false;
}
unsigned PPCInstrInfo::lookThruCopyLike(unsigned SrcReg,
const MachineRegisterInfo *MRI) {
while (true) {
MachineInstr *MI = MRI->getVRegDef(SrcReg);
if (!MI->isCopyLike())
return SrcReg;
unsigned CopySrcReg;
if (MI->isCopy())
CopySrcReg = MI->getOperand(1).getReg();
else {
assert(MI->isSubregToReg() && "Bad opcode for lookThruCopyLike");
CopySrcReg = MI->getOperand(2).getReg();
}
if (!TargetRegisterInfo::isVirtualRegister(CopySrcReg))
return CopySrcReg;
SrcReg = CopySrcReg;
}
}
// Essentially a compile-time implementation of a compare->isel sequence.
// It takes two constants to compare, along with the true/false registers
// and the comparison type (as a subreg to a CR field) and returns one
// of the true/false registers, depending on the comparison results.
static unsigned selectReg(int64_t Imm1, int64_t Imm2, unsigned CompareOpc,
unsigned TrueReg, unsigned FalseReg,
unsigned CRSubReg) {
// Signed comparisons. The immediates are assumed to be sign-extended.
if (CompareOpc == PPC::CMPWI || CompareOpc == PPC::CMPDI) {
switch (CRSubReg) {
default: llvm_unreachable("Unknown integer comparison type.");
case PPC::sub_lt:
return Imm1 < Imm2 ? TrueReg : FalseReg;
case PPC::sub_gt:
return Imm1 > Imm2 ? TrueReg : FalseReg;
case PPC::sub_eq:
return Imm1 == Imm2 ? TrueReg : FalseReg;
}
}
// Unsigned comparisons.
else if (CompareOpc == PPC::CMPLWI || CompareOpc == PPC::CMPLDI) {
switch (CRSubReg) {
default: llvm_unreachable("Unknown integer comparison type.");
case PPC::sub_lt:
return (uint64_t)Imm1 < (uint64_t)Imm2 ? TrueReg : FalseReg;
case PPC::sub_gt:
return (uint64_t)Imm1 > (uint64_t)Imm2 ? TrueReg : FalseReg;
case PPC::sub_eq:
return Imm1 == Imm2 ? TrueReg : FalseReg;
}
}
return PPC::NoRegister;
}
// Replace an instruction with one that materializes a constant (and sets
// CR0 if the original instruction was a record-form instruction).
void PPCInstrInfo::replaceInstrWithLI(MachineInstr &MI,
const LoadImmediateInfo &LII) const {
// Remove existing operands.
int OperandToKeep = LII.SetCR ? 1 : 0;
for (int i = MI.getNumOperands() - 1; i > OperandToKeep; i--)
MI.RemoveOperand(i);
// Replace the instruction.
if (LII.SetCR)
MI.setDesc(get(LII.Is64Bit ? PPC::ANDIo8 : PPC::ANDIo));
else
MI.setDesc(get(LII.Is64Bit ? PPC::LI8 : PPC::LI));
// Set the immediate.
MachineInstrBuilder(*MI.getParent()->getParent(), MI)
.addImm(LII.Imm);
}
MachineInstr *PPCInstrInfo::getConstantDefMI(MachineInstr &MI,
unsigned &ConstOp,
bool &SeenIntermediateUse) const {
ConstOp = ~0U;
MachineInstr *DefMI = nullptr;
MachineRegisterInfo *MRI = &MI.getParent()->getParent()->getRegInfo();
// If we'ere in SSA, get the defs through the MRI. Otherwise, only look
// within the basic block to see if the register is defined using an LI/LI8.
if (MRI->isSSA()) {
for (int i = 1, e = MI.getNumOperands(); i < e; i++) {
if (!MI.getOperand(i).isReg())
continue;
unsigned Reg = MI.getOperand(i).getReg();
if (!TargetRegisterInfo::isVirtualRegister(Reg))
continue;
unsigned TrueReg = lookThruCopyLike(Reg, MRI);
if (TargetRegisterInfo::isVirtualRegister(TrueReg)) {
DefMI = MRI->getVRegDef(TrueReg);
if (DefMI->getOpcode() == PPC::LI || DefMI->getOpcode() == PPC::LI8) {
ConstOp = i;
break;
}
}
}
} else {
// Looking back through the definition for each operand could be expensive,
// so exit early if this isn't an instruction that either has an immediate
// form or is already an immediate form that we can handle.
ImmInstrInfo III;
unsigned Opc = MI.getOpcode();
bool ConvertibleImmForm =
Opc == PPC::CMPWI || Opc == PPC::CMPLWI ||
Opc == PPC::CMPDI || Opc == PPC::CMPLDI ||
Opc == PPC::ADDI || Opc == PPC::ADDI8 ||
Opc == PPC::ORI || Opc == PPC::ORI8 ||
Opc == PPC::XORI || Opc == PPC::XORI8 ||
Opc == PPC::RLDICL || Opc == PPC::RLDICLo ||
Opc == PPC::RLDICL_32 || Opc == PPC::RLDICL_32_64 ||
Opc == PPC::RLWINM || Opc == PPC::RLWINMo ||
Opc == PPC::RLWINM8 || Opc == PPC::RLWINM8o;
if (!instrHasImmForm(MI, III) && !ConvertibleImmForm)
return nullptr;
// Don't convert or %X, %Y, %Y since that's just a register move.
if ((Opc == PPC::OR || Opc == PPC::OR8) &&
MI.getOperand(1).getReg() == MI.getOperand(2).getReg())
return nullptr;
for (int i = 1, e = MI.getNumOperands(); i < e; i++) {
MachineOperand &MO = MI.getOperand(i);
SeenIntermediateUse = false;
if (MO.isReg() && MO.isUse() && !MO.isImplicit()) {
MachineBasicBlock::reverse_iterator E = MI.getParent()->rend(), It = MI;
It++;
unsigned Reg = MI.getOperand(i).getReg();
// Is this register defined by a load-immediate in this block?
for ( ; It != E; ++It) {
if (It->modifiesRegister(Reg, &getRegisterInfo())) {
if (It->getOpcode() == PPC::LI || It->getOpcode() == PPC::LI8) {
ConstOp = i;
return &*It;
} else
break;
} else if (It->readsRegister(Reg, &getRegisterInfo()))
// If we see another use of this reg between the def and the MI,
// we want to flat it so the def isn't deleted.
SeenIntermediateUse = true;
}
}
}
}
return ConstOp == ~0U ? nullptr : DefMI;
}
// If this instruction has an immediate form and one of its operands is a
// result of a load-immediate, convert it to the immediate form if the constant
// is in range.
bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI,
MachineInstr **KilledDef) const {
MachineFunction *MF = MI.getParent()->getParent();
MachineRegisterInfo *MRI = &MF->getRegInfo();
bool PostRA = !MRI->isSSA();
bool SeenIntermediateUse = true;
unsigned ConstantOperand = ~0U;
MachineInstr *DefMI = getConstantDefMI(MI, ConstantOperand,
SeenIntermediateUse);
if (!DefMI || !DefMI->getOperand(1).isImm())
return false;
assert(ConstantOperand < MI.getNumOperands() &&
"The constant operand needs to be valid at this point");
int64_t Immediate = DefMI->getOperand(1).getImm();
// Sign-extend to 64-bits.
int64_t SExtImm = ((uint64_t)Immediate & ~0x7FFFuLL) != 0 ?
(Immediate | 0xFFFFFFFFFFFF0000) : Immediate;
if (KilledDef && MI.getOperand(ConstantOperand).isKill() &&
!SeenIntermediateUse)
*KilledDef = DefMI;
// If this is a reg+reg instruction that has a reg+imm form, convert it now.
ImmInstrInfo III;
if (instrHasImmForm(MI, III))
return transformToImmForm(MI, III, ConstantOperand, SExtImm);
bool ReplaceWithLI = false;
bool Is64BitLI = false;
int64_t NewImm = 0;
bool SetCR = false;
unsigned Opc = MI.getOpcode();
switch (Opc) {
default: return false;
// FIXME: Any branches conditional on such a comparison can be made
// unconditional. At this time, this happens too infrequently to be worth
// the implementation effort, but if that ever changes, we could convert
// such a pattern here.
case PPC::CMPWI:
case PPC::CMPLWI:
case PPC::CMPDI:
case PPC::CMPLDI: {
// Doing this post-RA would require dataflow analysis to reliably find uses
// of the CR register set by the compare.
if (PostRA)
return false;
// If a compare-immediate is fed by an immediate and is itself an input of
// an ISEL (the most common case) into a COPY of the correct register.
bool Changed = false;
unsigned DefReg = MI.getOperand(0).getReg();
int64_t Comparand = MI.getOperand(2).getImm();
int64_t SExtComparand = ((uint64_t)Comparand & ~0x7FFFuLL) != 0 ?
(Comparand | 0xFFFFFFFFFFFF0000) : Comparand;
for (auto &CompareUseMI : MRI->use_instructions(DefReg)) {
unsigned UseOpc = CompareUseMI.getOpcode();
if (UseOpc != PPC::ISEL && UseOpc != PPC::ISEL8)
continue;
unsigned CRSubReg = CompareUseMI.getOperand(3).getSubReg();
unsigned TrueReg = CompareUseMI.getOperand(1).getReg();
unsigned FalseReg = CompareUseMI.getOperand(2).getReg();
unsigned RegToCopy = selectReg(SExtImm, SExtComparand, Opc, TrueReg,
FalseReg, CRSubReg);
if (RegToCopy == PPC::NoRegister)
continue;
// Can't use PPC::COPY to copy PPC::ZERO[8]. Convert it to LI[8] 0.
if (RegToCopy == PPC::ZERO || RegToCopy == PPC::ZERO8) {
CompareUseMI.setDesc(get(UseOpc == PPC::ISEL8 ? PPC::LI8 : PPC::LI));
CompareUseMI.getOperand(1).ChangeToImmediate(0);
CompareUseMI.RemoveOperand(3);
CompareUseMI.RemoveOperand(2);
continue;
}
DEBUG(dbgs() << "Found LI -> CMPI -> ISEL, replacing with a copy.\n");
DEBUG(DefMI->dump(); MI.dump(); CompareUseMI.dump());
DEBUG(dbgs() << "Is converted to:\n");
// Convert to copy and remove unneeded operands.
CompareUseMI.setDesc(get(PPC::COPY));
CompareUseMI.RemoveOperand(3);
CompareUseMI.RemoveOperand(RegToCopy == TrueReg ? 2 : 1);
CmpIselsConverted++;
Changed = true;
DEBUG(CompareUseMI.dump());
}
if (Changed)
return true;
// This may end up incremented multiple times since this function is called
// during a fixed-point transformation, but it is only meant to indicate the
// presence of this opportunity.
MissedConvertibleImmediateInstrs++;
return false;
}
// Immediate forms - may simply be convertable to an LI.
case PPC::ADDI:
case PPC::ADDI8: {
// Does the sum fit in a 16-bit signed field?
int64_t Addend = MI.getOperand(2).getImm();
if (isInt<16>(Addend + SExtImm)) {
ReplaceWithLI = true;
Is64BitLI = Opc == PPC::ADDI8;
NewImm = Addend + SExtImm;
break;
}
}
case PPC::RLDICL:
case PPC::RLDICLo:
case PPC::RLDICL_32:
case PPC::RLDICL_32_64: {
// Use APInt's rotate function.
int64_t SH = MI.getOperand(2).getImm();
int64_t MB = MI.getOperand(3).getImm();
APInt InVal(Opc == PPC::RLDICL ? 64 : 32, SExtImm, true);
InVal = InVal.rotl(SH);
uint64_t Mask = (1LU << (63 - MB + 1)) - 1;
InVal &= Mask;
// Can't replace negative values with an LI as that will sign-extend
// and not clear the left bits. If we're setting the CR bit, we will use
// ANDIo which won't sign extend, so that's safe.
if (isUInt<15>(InVal.getSExtValue()) ||
(Opc == PPC::RLDICLo && isUInt<16>(InVal.getSExtValue()))) {
ReplaceWithLI = true;
Is64BitLI = Opc != PPC::RLDICL_32;
NewImm = InVal.getSExtValue();
SetCR = Opc == PPC::RLDICLo;
break;
}
return false;
}
case PPC::RLWINM:
case PPC::RLWINM8:
case PPC::RLWINMo:
case PPC::RLWINM8o: {
int64_t SH = MI.getOperand(2).getImm();
int64_t MB = MI.getOperand(3).getImm();
int64_t ME = MI.getOperand(4).getImm();
APInt InVal(32, SExtImm, true);
InVal = InVal.rotl(SH);
// Set the bits ( MB + 32 ) to ( ME + 32 ).
uint64_t Mask = ((1 << (32 - MB)) - 1) & ~((1 << (31 - ME)) - 1);
InVal &= Mask;
// Can't replace negative values with an LI as that will sign-extend
// and not clear the left bits. If we're setting the CR bit, we will use
// ANDIo which won't sign extend, so that's safe.
bool ValueFits = isUInt<15>(InVal.getSExtValue());
ValueFits |= ((Opc == PPC::RLWINMo || Opc == PPC::RLWINM8o) &&
isUInt<16>(InVal.getSExtValue()));
if (ValueFits) {
ReplaceWithLI = true;
Is64BitLI = Opc == PPC::RLWINM8 || Opc == PPC::RLWINM8o;
NewImm = InVal.getSExtValue();
SetCR = Opc == PPC::RLWINMo || Opc == PPC::RLWINM8o;
break;
}
return false;
}
case PPC::ORI:
case PPC::ORI8:
case PPC::XORI:
case PPC::XORI8: {
int64_t LogicalImm = MI.getOperand(2).getImm();
int64_t Result = 0;
if (Opc == PPC::ORI || Opc == PPC::ORI8)
Result = LogicalImm | SExtImm;
else
Result = LogicalImm ^ SExtImm;
if (isInt<16>(Result)) {
ReplaceWithLI = true;
Is64BitLI = Opc == PPC::ORI8 || Opc == PPC::XORI8;
NewImm = Result;
break;
}
return false;
}
}
if (ReplaceWithLI) {
DEBUG(dbgs() << "Replacing instruction:\n");
DEBUG(MI.dump());
DEBUG(dbgs() << "Fed by:\n");
DEBUG(DefMI->dump());
LoadImmediateInfo LII;
LII.Imm = NewImm;
LII.Is64Bit = Is64BitLI;
LII.SetCR = SetCR;
// If we're setting the CR, the original load-immediate must be kept (as an
// operand to ANDIo/ANDI8o).
if (KilledDef && SetCR)
*KilledDef = nullptr;
replaceInstrWithLI(MI, LII);
DEBUG(dbgs() << "With:\n");
DEBUG(MI.dump());
return true;
}
return false;
}
bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI,
ImmInstrInfo &III) const {
unsigned Opc = MI.getOpcode();
// The vast majority of the instructions would need their operand 2 replaced
// with an immediate when switching to the reg+imm form. A marked exception
// are the update form loads/stores for which a constant operand 2 would need
// to turn into a displacement and move operand 1 to the operand 2 position.
III.ImmOpNo = 2;
III.ConstantOpNo = 2;
III.ImmWidth = 16;
III.ImmMustBeMultipleOf = 1;
switch (Opc) {
default: return false;
case PPC::ADD4:
case PPC::ADD8:
III.SignedImm = true;
III.ZeroIsSpecialOrig = 0;
III.ZeroIsSpecialNew = 1;
III.IsCommutative = true;
III.ImmOpcode = Opc == PPC::ADD4 ? PPC::ADDI : PPC::ADDI8;
break;
case PPC::ADDC:
case PPC::ADDC8:
III.SignedImm = true;
III.ZeroIsSpecialOrig = 0;
III.ZeroIsSpecialNew = 0;
III.IsCommutative = true;
III.ImmOpcode = Opc == PPC::ADDC ? PPC::ADDIC : PPC::ADDIC8;
break;
case PPC::ADDCo:
III.SignedImm = true;
III.ZeroIsSpecialOrig = 0;
III.ZeroIsSpecialNew = 0;
III.IsCommutative = true;
III.ImmOpcode = PPC::ADDICo;
break;
case PPC::SUBFC:
case PPC::SUBFC8:
III.SignedImm = true;
III.ZeroIsSpecialOrig = 0;
III.ZeroIsSpecialNew = 0;
III.IsCommutative = false;
III.ImmOpcode = Opc == PPC::SUBFC ? PPC::SUBFIC : PPC::SUBFIC8;
break;
case PPC::CMPW:
case PPC::CMPD:
III.SignedImm = true;
III.ZeroIsSpecialOrig = 0;
III.ZeroIsSpecialNew = 0;
III.IsCommutative = false;
III.ImmOpcode = Opc == PPC::CMPW ? PPC::CMPWI : PPC::CMPDI;
break;
case PPC::CMPLW:
case PPC::CMPLD:
III.SignedImm = false;
III.ZeroIsSpecialOrig = 0;
III.ZeroIsSpecialNew = 0;
III.IsCommutative = false;
III.ImmOpcode = Opc == PPC::CMPLW ? PPC::CMPLWI : PPC::CMPLDI;
break;
case PPC::ANDo:
case PPC::AND8o:
case PPC::OR:
case PPC::OR8:
case PPC::XOR:
case PPC::XOR8:
III.SignedImm = false;
III.ZeroIsSpecialOrig = 0;
III.ZeroIsSpecialNew = 0;
III.IsCommutative = true;
switch(Opc) {
default: llvm_unreachable("Unknown opcode");
case PPC::ANDo: III.ImmOpcode = PPC::ANDIo; break;
case PPC::AND8o: III.ImmOpcode = PPC::ANDIo8; break;
case PPC::OR: III.ImmOpcode = PPC::ORI; break;
case PPC::OR8: III.ImmOpcode = PPC::ORI8; break;
case PPC::XOR: III.ImmOpcode = PPC::XORI; break;
case PPC::XOR8: III.ImmOpcode = PPC::XORI8; break;
}
break;
case PPC::RLWNM:
case PPC::RLWNM8:
case PPC::RLWNMo:
case PPC::RLWNM8o:
case PPC::RLDCL:
case PPC::RLDCLo:
case PPC::RLDCR:
case PPC::RLDCRo:
case PPC::SLW:
case PPC::SLW8:
case PPC::SLWo:
case PPC::SLW8o:
case PPC::SRW:
case PPC::SRW8:
case PPC::SRWo:
case PPC::SRW8o:
case PPC::SRAW:
case PPC::SRAWo:
case PPC::SLD:
case PPC::SLDo:
case PPC::SRD:
case PPC::SRDo:
case PPC::SRAD:
case PPC::SRADo:
III.SignedImm = false;
III.ZeroIsSpecialOrig = 0;
III.ZeroIsSpecialNew = 0;
III.IsCommutative = false;
// This isn't actually true, but the instructions ignore any of the
// upper bits, so any immediate loaded with an LI is acceptable.
III.ImmWidth = 16;
switch(Opc) {
default: llvm_unreachable("Unknown opcode");
case PPC::RLWNM: III.ImmOpcode = PPC::RLWINM; break;
case PPC::RLWNM8: III.ImmOpcode = PPC::RLWINM8; break;
case PPC::RLWNMo: III.ImmOpcode = PPC::RLWINMo; break;
case PPC::RLWNM8o: III.ImmOpcode = PPC::RLWINM8o; break;
case PPC::RLDCL: III.ImmOpcode = PPC::RLDICL; break;
case PPC::RLDCLo: III.ImmOpcode = PPC::RLDICLo; break;
case PPC::RLDCR: III.ImmOpcode = PPC::RLDICR; break;
case PPC::RLDCRo: III.ImmOpcode = PPC::RLDICRo; break;
case PPC::SLW: III.ImmOpcode = PPC::RLWINM; break;
case PPC::SLW8: III.ImmOpcode = PPC::RLWINM8; break;
case PPC::SLWo: III.ImmOpcode = PPC::RLWINMo; break;
case PPC::SLW8o: III.ImmOpcode = PPC::RLWINM8o; break;
case PPC::SRW: III.ImmOpcode = PPC::RLWINM; break;
case PPC::SRW8: III.ImmOpcode = PPC::RLWINM8; break;
case PPC::SRWo: III.ImmOpcode = PPC::RLWINMo; break;
case PPC::SRW8o: III.ImmOpcode = PPC::RLWINM8o; break;
case PPC::SRAW: III.ImmOpcode = PPC::SRAWI; break;
case PPC::SRAWo: III.ImmOpcode = PPC::SRAWIo; break;
case PPC::SLD: III.ImmOpcode = PPC::RLDICR; break;
case PPC::SLDo: III.ImmOpcode = PPC::RLDICRo; break;
case PPC::SRD: III.ImmOpcode = PPC::RLDICL; break;
case PPC::SRDo: III.ImmOpcode = PPC::RLDICLo; break;
case PPC::SRAD: III.ImmOpcode = PPC::SRADI; break;
case PPC::SRADo: III.ImmOpcode = PPC::SRADIo; break;
}
break;
// Loads and stores:
case PPC::LBZX:
case PPC::LBZX8:
case PPC::LHZX:
case PPC::LHZX8:
case PPC::LHAX:
case PPC::LHAX8:
case PPC::LWZX:
case PPC::LWZX8:
case PPC::LWAX:
case PPC::LDX:
case PPC::LFSX:
case PPC::LFDX:
case PPC::STBX:
case PPC::STBX8:
case PPC::STHX:
case PPC::STHX8:
case PPC::STWX:
case PPC::STWX8:
case PPC::STDX:
case PPC::STFSX:
case PPC::STFDX:
III.SignedImm = true;
III.ZeroIsSpecialOrig = 1;
III.ZeroIsSpecialNew = 2;
III.IsCommutative = true;
III.ImmOpNo = 1;
III.ConstantOpNo = 2;
switch(Opc) {
default: llvm_unreachable("Unknown opcode");
case PPC::LBZX: III.ImmOpcode = PPC::LBZ; break;
case PPC::LBZX8: III.ImmOpcode = PPC::LBZ8; break;
case PPC::LHZX: III.ImmOpcode = PPC::LHZ; break;
case PPC::LHZX8: III.ImmOpcode = PPC::LHZ8; break;
case PPC::LHAX: III.ImmOpcode = PPC::LHA; break;
case PPC::LHAX8: III.ImmOpcode = PPC::LHA8; break;
case PPC::LWZX: III.ImmOpcode = PPC::LWZ; break;
case PPC::LWZX8: III.ImmOpcode = PPC::LWZ8; break;
case PPC::LWAX:
III.ImmOpcode = PPC::LWA;
III.ImmMustBeMultipleOf = 4;
break;
case PPC::LDX: III.ImmOpcode = PPC::LD; III.ImmMustBeMultipleOf = 4; break;
case PPC::LFSX: III.ImmOpcode = PPC::LFS; break;
case PPC::LFDX: III.ImmOpcode = PPC::LFD; break;
case PPC::STBX: III.ImmOpcode = PPC::STB; break;
case PPC::STBX8: III.ImmOpcode = PPC::STB8; break;
case PPC::STHX: III.ImmOpcode = PPC::STH; break;
case PPC::STHX8: III.ImmOpcode = PPC::STH8; break;
case PPC::STWX: III.ImmOpcode = PPC::STW; break;
case PPC::STWX8: III.ImmOpcode = PPC::STW8; break;
case PPC::STDX:
III.ImmOpcode = PPC::STD;
III.ImmMustBeMultipleOf = 4;
break;
case PPC::STFSX: III.ImmOpcode = PPC::STFS; break;
case PPC::STFDX: III.ImmOpcode = PPC::STFD; break;
}
break;
case PPC::LBZUX:
case PPC::LBZUX8:
case PPC::LHZUX:
case PPC::LHZUX8:
case PPC::LHAUX:
case PPC::LHAUX8:
case PPC::LWZUX:
case PPC::LWZUX8:
case PPC::LDUX:
case PPC::LFSUX:
case PPC::LFDUX:
case PPC::STBUX:
case PPC::STBUX8:
case PPC::STHUX:
case PPC::STHUX8:
case PPC::STWUX:
case PPC::STWUX8:
case PPC::STDUX:
case PPC::STFSUX:
case PPC::STFDUX:
III.SignedImm = true;
III.ZeroIsSpecialOrig = 2;
III.ZeroIsSpecialNew = 3;
III.IsCommutative = false;
III.ImmOpNo = 2;
III.ConstantOpNo = 3;
switch(Opc) {
default: llvm_unreachable("Unknown opcode");
case PPC::LBZUX: III.ImmOpcode = PPC::LBZU; break;
case PPC::LBZUX8: III.ImmOpcode = PPC::LBZU8; break;
case PPC::LHZUX: III.ImmOpcode = PPC::LHZU; break;
case PPC::LHZUX8: III.ImmOpcode = PPC::LHZU8; break;
case PPC::LHAUX: III.ImmOpcode = PPC::LHAU; break;
case PPC::LHAUX8: III.ImmOpcode = PPC::LHAU8; break;
case PPC::LWZUX: III.ImmOpcode = PPC::LWZU; break;
case PPC::LWZUX8: III.ImmOpcode = PPC::LWZU8; break;
case PPC::LDUX:
III.ImmOpcode = PPC::LDU;
III.ImmMustBeMultipleOf = 4;
break;
case PPC::LFSUX: III.ImmOpcode = PPC::LFSU; break;
case PPC::LFDUX: III.ImmOpcode = PPC::LFDU; break;
case PPC::STBUX: III.ImmOpcode = PPC::STBU; break;
case PPC::STBUX8: III.ImmOpcode = PPC::STBU8; break;
case PPC::STHUX: III.ImmOpcode = PPC::STHU; break;
case PPC::STHUX8: III.ImmOpcode = PPC::STHU8; break;
case PPC::STWUX: III.ImmOpcode = PPC::STWU; break;
case PPC::STWUX8: III.ImmOpcode = PPC::STWU8; break;
case PPC::STDUX:
III.ImmOpcode = PPC::STDU;
III.ImmMustBeMultipleOf = 4;
break;
case PPC::STFSUX: III.ImmOpcode = PPC::STFSU; break;
case PPC::STFDUX: III.ImmOpcode = PPC::STFDU; break;
}
break;
// Power9 only.
case PPC::LXVX:
case PPC::LXSSPX:
case PPC::LXSDX:
case PPC::STXVX:
case PPC::STXSSPX:
case PPC::STXSDX:
if (!Subtarget.hasP9Vector())
return false;
III.SignedImm = true;
III.ZeroIsSpecialOrig = 1;
III.ZeroIsSpecialNew = 2;
III.IsCommutative = true;
III.ImmOpNo = 1;
III.ConstantOpNo = 2;
switch(Opc) {
default: llvm_unreachable("Unknown opcode");
case PPC::LXVX:
III.ImmOpcode = PPC::LXV;
III.ImmMustBeMultipleOf = 16;
break;
case PPC::LXSSPX:
III.ImmOpcode = PPC::LXSSP;
III.ImmMustBeMultipleOf = 4;
break;
case PPC::LXSDX:
III.ImmOpcode = PPC::LXSD;
III.ImmMustBeMultipleOf = 4;
break;
case PPC::STXVX:
III.ImmOpcode = PPC::STXV;
III.ImmMustBeMultipleOf = 16;
break;
case PPC::STXSSPX:
III.ImmOpcode = PPC::STXSSP;
III.ImmMustBeMultipleOf = 4;
break;
case PPC::STXSDX:
III.ImmOpcode = PPC::STXSD;
III.ImmMustBeMultipleOf = 4;
break;
}
break;
}
return true;
}
// Utility function for swaping two arbitrary operands of an instruction.
static void swapMIOperands(MachineInstr &MI, unsigned Op1, unsigned Op2) {
assert(Op1 != Op2 && "Cannot swap operand with itself.");
unsigned MaxOp = std::max(Op1, Op2);
unsigned MinOp = std::min(Op1, Op2);
MachineOperand MOp1 = MI.getOperand(MinOp);
MachineOperand MOp2 = MI.getOperand(MaxOp);
MI.RemoveOperand(std::max(Op1, Op2));
MI.RemoveOperand(std::min(Op1, Op2));
// If the operands we are swapping are the two at the end (the common case)
// we can just remove both and add them in the opposite order.
if (MaxOp - MinOp == 1 && MI.getNumOperands() == MinOp) {
MI.addOperand(MOp2);
MI.addOperand(MOp1);
} else {
// Store all operands in a temporary vector, remove them and re-add in the
// right order.
SmallVector<MachineOperand, 2> MOps;
unsigned TotalOps = MI.getNumOperands() + 2; // We've already removed 2 ops.
for (unsigned i = MI.getNumOperands() - 1; i >= MinOp; i--) {
MOps.push_back(MI.getOperand(i));
MI.RemoveOperand(i);
}
// MOp2 needs to be added next.
MI.addOperand(MOp2);
// Now add the rest.
for (unsigned i = MI.getNumOperands(); i < TotalOps; i++) {
if (i == MaxOp)
MI.addOperand(MOp1);
else {
MI.addOperand(MOps.back());
MOps.pop_back();
}
}
}
}
bool PPCInstrInfo::transformToImmForm(MachineInstr &MI, const ImmInstrInfo &III,
unsigned ConstantOpNo,
int64_t Imm) const {
MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
bool PostRA = !MRI.isSSA();
// Exit early if we can't convert this.
if ((ConstantOpNo != III.ConstantOpNo) && !III.IsCommutative)
return false;
if (Imm % III.ImmMustBeMultipleOf)
return false;
if (III.SignedImm) {
APInt ActualValue(64, Imm, true);
if (!ActualValue.isSignedIntN(III.ImmWidth))
return false;
} else {
uint64_t UnsignedMax = (1 << III.ImmWidth) - 1;
if ((uint64_t)Imm > UnsignedMax)
return false;
}
// If we're post-RA, the instructions don't agree on whether register zero is
// special, we can transform this as long as the register operand that will
// end up in the location where zero is special isn't R0.
if (PostRA && III.ZeroIsSpecialOrig != III.ZeroIsSpecialNew) {
unsigned PosForOrigZero = III.ZeroIsSpecialOrig ? III.ZeroIsSpecialOrig :
III.ZeroIsSpecialNew + 1;
unsigned OrigZeroReg = MI.getOperand(PosForOrigZero).getReg();
unsigned NewZeroReg = MI.getOperand(III.ZeroIsSpecialNew).getReg();
// If R0 is in the operand where zero is special for the new instruction,
// it is unsafe to transform if the constant operand isn't that operand.
if ((NewZeroReg == PPC::R0 || NewZeroReg == PPC::X0) &&
ConstantOpNo != III.ZeroIsSpecialNew)
return false;
if ((OrigZeroReg == PPC::R0 || OrigZeroReg == PPC::X0) &&
ConstantOpNo != PosForOrigZero)
return false;
}
unsigned Opc = MI.getOpcode();
bool SpecialShift32 =
Opc == PPC::SLW || Opc == PPC::SLWo || Opc == PPC::SRW || Opc == PPC::SRWo;
bool SpecialShift64 =
Opc == PPC::SLD || Opc == PPC::SLDo || Opc == PPC::SRD || Opc == PPC::SRDo;
bool SetCR = Opc == PPC::SLWo || Opc == PPC::SRWo ||
Opc == PPC::SLDo || Opc == PPC::SRDo;
bool RightShift =
Opc == PPC::SRW || Opc == PPC::SRWo || Opc == PPC::SRD || Opc == PPC::SRDo;
MI.setDesc(get(III.ImmOpcode));
if (ConstantOpNo == III.ConstantOpNo) {
// Converting shifts to immediate form is a bit tricky since they may do
// one of three things:
// 1. If the shift amount is between OpSize and 2*OpSize, the result is zero
// 2. If the shift amount is zero, the result is unchanged (save for maybe
// setting CR0)
// 3. If the shift amount is in [1, OpSize), it's just a shift
if (SpecialShift32 || SpecialShift64) {
LoadImmediateInfo LII;
LII.Imm = 0;
LII.SetCR = SetCR;
LII.Is64Bit = SpecialShift64;
uint64_t ShAmt = Imm & (SpecialShift32 ? 0x1F : 0x3F);
if (Imm & (SpecialShift32 ? 0x20 : 0x40))
replaceInstrWithLI(MI, LII);
// Shifts by zero don't change the value. If we don't need to set CR0,
// just convert this to a COPY. Can't do this post-RA since we've already
// cleaned up the copies.
else if (!SetCR && ShAmt == 0 && !PostRA) {
MI.RemoveOperand(2);
MI.setDesc(get(PPC::COPY));
} else {
// The 32 bit and 64 bit instructions are quite different.
if (SpecialShift32) {
// Left shifts use (N, 0, 31-N), right shifts use (32-N, N, 31).
uint64_t SH = RightShift ? 32 - ShAmt : ShAmt;
uint64_t MB = RightShift ? ShAmt : 0;
uint64_t ME = RightShift ? 31 : 31 - ShAmt;
MI.getOperand(III.ConstantOpNo).ChangeToImmediate(SH);
MachineInstrBuilder(*MI.getParent()->getParent(), MI).addImm(MB)
.addImm(ME);
} else {
// Left shifts use (N, 63-N), right shifts use (64-N, N).
uint64_t SH = RightShift ? 64 - ShAmt : ShAmt;
uint64_t ME = RightShift ? ShAmt : 63 - ShAmt;
MI.getOperand(III.ConstantOpNo).ChangeToImmediate(SH);
MachineInstrBuilder(*MI.getParent()->getParent(), MI).addImm(ME);
}
}
} else
MI.getOperand(ConstantOpNo).ChangeToImmediate(Imm);
}
// Convert commutative instructions (switch the operands and convert the
// desired one to an immediate.
else if (III.IsCommutative) {
MI.getOperand(ConstantOpNo).ChangeToImmediate(Imm);
swapMIOperands(MI, ConstantOpNo, III.ConstantOpNo);
} else
llvm_unreachable("Should have exited early!");
// For instructions for which the constant register replaces a different
// operand than where the immediate goes, we need to swap them.
if (III.ConstantOpNo != III.ImmOpNo)
swapMIOperands(MI, III.ConstantOpNo, III.ImmOpNo);
// If the R0/X0 register is special for the original instruction and not for
// the new instruction (or vice versa), we need to fix up the register class.
if (!PostRA && III.ZeroIsSpecialOrig != III.ZeroIsSpecialNew) {
if (!III.ZeroIsSpecialOrig) {
unsigned RegToModify = MI.getOperand(III.ZeroIsSpecialNew).getReg();
const TargetRegisterClass *NewRC =
MRI.getRegClass(RegToModify)->hasSuperClassEq(&PPC::GPRCRegClass) ?
&PPC::GPRC_and_GPRC_NOR0RegClass : &PPC::G8RC_and_G8RC_NOX0RegClass;
MRI.setRegClass(RegToModify, NewRC);
}
}
return true;
}
const TargetRegisterClass *
PPCInstrInfo::updatedRC(const TargetRegisterClass *RC) const {
if (Subtarget.hasVSX() && RC == &PPC::VRRCRegClass)

View File

@ -72,6 +72,41 @@ enum {
};
} // end namespace PPCII
// Instructions that have an immediate form might be convertible to that
// form if the correct input is a result of a load immediate. In order to
// know whether the transformation is special, we might need to know some
// of the details of the two forms.
struct ImmInstrInfo {
// Is the immediate field in the immediate form signed or unsigned?
uint64_t SignedImm : 1;
// Does the immediate need to be a multiple of some value?
uint64_t ImmMustBeMultipleOf : 5;
// Is R0/X0 treated specially by the original r+r instruction?
// If so, in which operand?
uint64_t ZeroIsSpecialOrig : 3;
// Is R0/X0 treated specially by the new r+i instruction?
// If so, in which operand?
uint64_t ZeroIsSpecialNew : 3;
// Is the operation commutative?
uint64_t IsCommutative : 1;
// The operand number to check for load immediate.
uint64_t ConstantOpNo : 3;
// The operand number for the immediate.
uint64_t ImmOpNo : 3;
// The opcode of the new instruction.
uint64_t ImmOpcode : 16;
// The size of the immediate.
uint64_t ImmWidth : 5;
};
// Information required to convert an instruction to just a materialized
// immediate.
struct LoadImmediateInfo {
unsigned Imm : 16;
unsigned Is64Bit : 1;
unsigned SetCR : 1;
};
class PPCSubtarget;
class PPCInstrInfo : public PPCGenInstrInfo {
PPCSubtarget &Subtarget;
@ -87,6 +122,10 @@ class PPCInstrInfo : public PPCGenInstrInfo {
const TargetRegisterClass *RC,
SmallVectorImpl<MachineInstr *> &NewMIs,
bool &NonRI, bool &SpillsVRS) const;
bool transformToImmForm(MachineInstr &MI, const ImmInstrInfo &III,
unsigned ConstantOpNo, int64_t Imm) const;
MachineInstr *getConstantDefMI(MachineInstr &MI, unsigned &ConstOp,
bool &SeenIntermediateUse) const;
virtual void anchor();
protected:
@ -313,6 +352,19 @@ public:
bool isZeroExtended(const MachineInstr &MI, const unsigned depth = 0) const {
return isSignOrZeroExtended(MI, false, depth);
}
bool convertToImmediateForm(MachineInstr &MI,
MachineInstr **KilledDef = nullptr) const;
void replaceInstrWithLI(MachineInstr &MI, const LoadImmediateInfo &LII) const;
// This is used to find the "true" source register for n
// Machine instruction. Returns the original SrcReg unless it is the target
// of a copy-like operation, in which case we chain backwards through all
// such operations to the ultimate source register. If a
// physical register is encountered, we stop the search.
static unsigned lookThruCopyLike(unsigned SrcReg,
const MachineRegisterInfo *MRI);
bool instrHasImmForm(const MachineInstr &MI, ImmInstrInfo &III) const;
};
}

View File

@ -1590,6 +1590,11 @@ def : Pat<(prefetch xoaddr:$dst, (i32 0), imm, (i32 0)),
(ICBT 0, xoaddr:$dst)>, Requires<[HasICBT]>; // inst prefetch (for read)
// Atomic operations
// FIXME: some of these might be used with constant operands. This will result
// in constant materialization instructions that may be redundant. We currently
// clean this up in PPCMIPeephole with calls to
// PPCInstrInfo::convertToImmediateForm() but we should probably not emit them
// in the first place.
let usesCustomInserter = 1 in {
let Defs = [CR0] in {
def ATOMIC_LOAD_ADD_I8 : Pseudo<

View File

@ -41,6 +41,22 @@ STATISTIC(MultiTOCSaves,
STATISTIC(NumEliminatedSExt, "Number of eliminated sign-extensions");
STATISTIC(NumEliminatedZExt, "Number of eliminated zero-extensions");
STATISTIC(NumOptADDLIs, "Number of optimized ADD instruction fed by LI");
STATISTIC(NumConvertedToImmediateForm,
"Number of instructions converted to their immediate form");
STATISTIC(NumFunctionsEnteredInMIPeephole,
"Number of functions entered in PPC MI Peepholes");
STATISTIC(NumFixedPointIterations,
"Number of fixed-point iterations converting reg-reg instructions "
"to reg-imm ones");
static cl::opt<bool>
FixedPointRegToImm("ppc-reg-to-imm-fixed-point", cl::Hidden, cl::init(true),
cl::desc("Iterate to a fixed point when attempting to "
"convert reg-reg instructions to reg-imm"));
static cl::opt<bool>
ConvertRegReg("ppc-convert-rr-to-ri", cl::Hidden, cl::init(true),
cl::desc("Convert eligible reg+reg instructions to reg+imm"));
static cl::opt<bool>
EnableSExtElimination("ppc-eliminate-signext",
@ -52,10 +68,6 @@ static cl::opt<bool>
cl::desc("enable elimination of zero-extensions"),
cl::init(false), cl::Hidden);
namespace llvm {
void initializePPCMIPeepholePass(PassRegistry&);
}
namespace {
struct PPCMIPeephole : public MachineFunctionPass {
@ -83,9 +95,6 @@ private:
bool eliminateRedundantTOCSaves(std::map<MachineInstr *, bool> &TOCSaves);
void UpdateTOCSaves(std::map<MachineInstr *, bool> &TOCSaves,
MachineInstr *MI);
// Find the "true" register represented by SrcReg (following chains
// of copies and subreg_to_reg operations).
unsigned lookThruCopyLike(unsigned SrcReg);
public:
@ -212,6 +221,35 @@ bool PPCMIPeephole::simplifyCode(void) {
MachineInstr* ToErase = nullptr;
std::map<MachineInstr *, bool> TOCSaves;
NumFunctionsEnteredInMIPeephole++;
if (ConvertRegReg) {
// Fixed-point conversion of reg/reg instructions fed by load-immediate
// into reg/imm instructions. FIXME: This is expensive, control it with
// an option.
bool SomethingChanged = false;
do {
NumFixedPointIterations++;
SomethingChanged = false;
for (MachineBasicBlock &MBB : *MF) {
for (MachineInstr &MI : MBB) {
if (MI.isDebugValue())
continue;
if (TII->convertToImmediateForm(MI)) {
// We don't erase anything in case the def has other uses. Let DCE
// remove it if it can be removed.
DEBUG(dbgs() << "Converted instruction to imm form: ");
DEBUG(MI.dump());
NumConvertedToImmediateForm++;
SomethingChanged = true;
Simplified = true;
continue;
}
}
}
} while (SomethingChanged && FixedPointRegToImm);
}
for (MachineBasicBlock &MBB : *MF) {
for (MachineInstr &MI : MBB) {
@ -258,8 +296,10 @@ bool PPCMIPeephole::simplifyCode(void) {
// XXPERMDI t, SUBREG_TO_REG(s), SUBREG_TO_REG(s), immed.
// We have to look through chains of COPY and SUBREG_TO_REG
// to find the real source values for comparison.
unsigned TrueReg1 = lookThruCopyLike(MI.getOperand(1).getReg());
unsigned TrueReg2 = lookThruCopyLike(MI.getOperand(2).getReg());
unsigned TrueReg1 =
TII->lookThruCopyLike(MI.getOperand(1).getReg(), MRI);
unsigned TrueReg2 =
TII->lookThruCopyLike(MI.getOperand(2).getReg(), MRI);
if (TrueReg1 == TrueReg2
&& TargetRegisterInfo::isVirtualRegister(TrueReg1)) {
@ -273,7 +313,8 @@ bool PPCMIPeephole::simplifyCode(void) {
auto isConversionOfLoadAndSplat = [=]() -> bool {
if (DefOpc != PPC::XVCVDPSXDS && DefOpc != PPC::XVCVDPUXDS)
return false;
unsigned DefReg = lookThruCopyLike(DefMI->getOperand(1).getReg());
unsigned DefReg =
TII->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI);
if (TargetRegisterInfo::isVirtualRegister(DefReg)) {
MachineInstr *LoadMI = MRI->getVRegDef(DefReg);
if (LoadMI && LoadMI->getOpcode() == PPC::LXVDSX)
@ -299,10 +340,10 @@ bool PPCMIPeephole::simplifyCode(void) {
// can replace it with a copy.
if (DefOpc == PPC::XXPERMDI) {
unsigned FeedImmed = DefMI->getOperand(3).getImm();
unsigned FeedReg1
= lookThruCopyLike(DefMI->getOperand(1).getReg());
unsigned FeedReg2
= lookThruCopyLike(DefMI->getOperand(2).getReg());
unsigned FeedReg1 =
TII->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI);
unsigned FeedReg2 =
TII->lookThruCopyLike(DefMI->getOperand(2).getReg(), MRI);
if ((FeedImmed == 0 || FeedImmed == 3) && FeedReg1 == FeedReg2) {
DEBUG(dbgs()
@ -360,7 +401,8 @@ bool PPCMIPeephole::simplifyCode(void) {
case PPC::XXSPLTW: {
unsigned MyOpcode = MI.getOpcode();
unsigned OpNo = MyOpcode == PPC::XXSPLTW ? 1 : 2;
unsigned TrueReg = lookThruCopyLike(MI.getOperand(OpNo).getReg());
unsigned TrueReg =
TII->lookThruCopyLike(MI.getOperand(OpNo).getReg(), MRI);
if (!TargetRegisterInfo::isVirtualRegister(TrueReg))
break;
MachineInstr *DefMI = MRI->getVRegDef(TrueReg);
@ -422,7 +464,8 @@ bool PPCMIPeephole::simplifyCode(void) {
}
case PPC::XVCVDPSP: {
// If this is a DP->SP conversion fed by an FRSP, the FRSP is redundant.
unsigned TrueReg = lookThruCopyLike(MI.getOperand(1).getReg());
unsigned TrueReg =
TII->lookThruCopyLike(MI.getOperand(1).getReg(), MRI);
if (!TargetRegisterInfo::isVirtualRegister(TrueReg))
break;
MachineInstr *DefMI = MRI->getVRegDef(TrueReg);
@ -430,8 +473,10 @@ bool PPCMIPeephole::simplifyCode(void) {
// This can occur when building a vector of single precision or integer
// values.
if (DefMI && DefMI->getOpcode() == PPC::XXPERMDI) {
unsigned DefsReg1 = lookThruCopyLike(DefMI->getOperand(1).getReg());
unsigned DefsReg2 = lookThruCopyLike(DefMI->getOperand(2).getReg());
unsigned DefsReg1 =
TII->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI);
unsigned DefsReg2 =
TII->lookThruCopyLike(DefMI->getOperand(2).getReg(), MRI);
if (!TargetRegisterInfo::isVirtualRegister(DefsReg1) ||
!TargetRegisterInfo::isVirtualRegister(DefsReg2))
break;
@ -1221,36 +1266,6 @@ bool PPCMIPeephole::eliminateRedundantCompare(void) {
return Simplified;
}
// This is used to find the "true" source register for an
// XXPERMDI instruction, since MachineCSE does not handle the
// "copy-like" operations (Copy and SubregToReg). Returns
// the original SrcReg unless it is the target of a copy-like
// operation, in which case we chain backwards through all
// such operations to the ultimate source register. If a
// physical register is encountered, we stop the search.
unsigned PPCMIPeephole::lookThruCopyLike(unsigned SrcReg) {
while (true) {
MachineInstr *MI = MRI->getVRegDef(SrcReg);
if (!MI->isCopyLike())
return SrcReg;
unsigned CopySrcReg;
if (MI->isCopy())
CopySrcReg = MI->getOperand(1).getReg();
else {
assert(MI->isSubregToReg() && "bad opcode for lookThruCopyLike");
CopySrcReg = MI->getOperand(2).getReg();
}
if (!TargetRegisterInfo::isVirtualRegister(CopySrcReg))
return CopySrcReg;
SrcReg = CopySrcReg;
}
}
} // end default namespace
INITIALIZE_PASS_BEGIN(PPCMIPeephole, DEBUG_TYPE,

View File

@ -0,0 +1,95 @@
//===--------- PPCPreEmitPeephole.cpp - Late peephole optimizations -------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// A pre-emit peephole for catching opportunities introduced by late passes such
// as MachineBlockPlacement.
//
//===----------------------------------------------------------------------===//
#include "PPC.h"
#include "PPCInstrInfo.h"
#include "PPCSubtarget.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Support/Debug.h"
using namespace llvm;
#define DEBUG_TYPE "ppc-pre-emit-peephole"
STATISTIC(NumRRConvertedInPreEmit,
"Number of r+r instructions converted to r+i in pre-emit peephole");
STATISTIC(NumRemovedInPreEmit,
"Number of instructions deleted in pre-emit peephole");
static cl::opt<bool>
RunPreEmitPeephole("ppc-late-peephole", cl::Hidden, cl::init(true),
cl::desc("Run pre-emit peephole optimizations."));
namespace {
class PPCPreEmitPeephole : public MachineFunctionPass {
public:
static char ID;
PPCPreEmitPeephole() : MachineFunctionPass(ID) {
initializePPCPreEmitPeepholePass(*PassRegistry::getPassRegistry());
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
MachineFunctionPass::getAnalysisUsage(AU);
}
MachineFunctionProperties getRequiredProperties() const override {
return MachineFunctionProperties().set(
MachineFunctionProperties::Property::NoVRegs);
}
bool runOnMachineFunction(MachineFunction &MF) override {
if (skipFunction(*MF.getFunction()) || !RunPreEmitPeephole)
return false;
bool Changed = false;
const PPCInstrInfo *TII = MF.getSubtarget<PPCSubtarget>().getInstrInfo();
SmallVector<MachineInstr *, 4> InstrsToErase;
for (MachineBasicBlock &MBB : MF) {
for (MachineInstr &MI : MBB) {
MachineInstr *DefMIToErase = nullptr;
if (TII->convertToImmediateForm(MI, &DefMIToErase)) {
Changed = true;
NumRRConvertedInPreEmit++;
DEBUG(dbgs() << "Converted instruction to imm form: ");
DEBUG(MI.dump());
if (DefMIToErase) {
InstrsToErase.push_back(DefMIToErase);
}
}
}
}
for (MachineInstr *MI : InstrsToErase) {
DEBUG(dbgs() << "PPC pre-emit peephole: erasing instruction: ");
DEBUG(MI->dump());
MI->eraseFromParent();
NumRemovedInPreEmit++;
}
return Changed;
}
};
}
INITIALIZE_PASS(PPCPreEmitPeephole, DEBUG_TYPE, "PowerPC Pre-Emit Peephole",
false, false)
char PPCPreEmitPeephole::ID = 0;
FunctionPass *llvm::createPPCPreEmitPeepholePass() {
return new PPCPreEmitPeephole();
}

View File

@ -101,7 +101,9 @@ extern "C" void LLVMInitializePowerPCTarget() {
PassRegistry &PR = *PassRegistry::getPassRegistry();
initializePPCBoolRetToIntPass(PR);
initializePPCExpandISELPass(PR);
initializePPCPreEmitPeepholePass(PR);
initializePPCTLSDynamicCallPass(PR);
initializePPCMIPeepholePass(PR);
}
/// Return the datalayout string of a subtarget.
@ -440,6 +442,7 @@ void PPCPassConfig::addPreSched2() {
}
void PPCPassConfig::addPreEmitPass() {
addPass(createPPCPreEmitPeepholePass());
addPass(createPPCExpandISELPass());
if (getOptLevel() != CodeGenOpt::None)

View File

@ -3508,13 +3508,13 @@ entry:
; P9LE: xxmrghd
; P9LE-NEXT: xvcvdpsxds v2
; P9LE-NEXT: blr
; P8BE: lfsx
; P8BE: lfsx
; P8BE: lfs
; P8BE: lfs
; P8BE: xxmrghd
; P8BE-NEXT: xvcvdpsxds v2
; P8BE-NEXT: blr
; P8LE: lfsx
; P8LE: lfsx
; P8LE: lfs
; P8LE: lfs
; P8LE: xxmrghd
; P8LE-NEXT: xvcvdpsxds v2
; P8LE-NEXT: blr
@ -3546,13 +3546,13 @@ entry:
; P9LE: xxmrghd
; P9LE-NEXT: xvcvdpsxds v2
; P9LE-NEXT: blr
; P8BE: lfsx
; P8BE: lfsx
; P8BE: lfs
; P8BE: lfs
; P8BE: xxmrghd
; P8BE-NEXT: xvcvdpsxds v2
; P8BE-NEXT: blr
; P8LE: lfsx
; P8LE: lfsx
; P8LE: lfs
; P8LE: lfs
; P8LE: xxmrghd
; P8LE-NEXT: xvcvdpsxds v2
; P8LE-NEXT: blr
@ -3591,13 +3591,13 @@ entry:
; P9LE-NEXT: blr
; P8BE: sldi
; P8BE: lfsux
; P8BE: lfsx
; P8BE: lfs
; P8BE: xxmrghd
; P8BE-NEXT: xvcvdpsxds v2
; P8BE-NEXT: blr
; P8LE: sldi
; P8LE: lfsux
; P8LE: lfsx
; P8LE: lfs
; P8LE: xxmrghd
; P8LE-NEXT: xvcvdpsxds v2
; P8LE-NEXT: blr
@ -3636,13 +3636,13 @@ entry:
; P9LE-NEXT: blr
; P8BE: sldi
; P8BE: lfsux
; P8BE: lfsx
; P8BE: lfs
; P8BE: xxmrghd
; P8BE-NEXT: xvcvdpsxds v2
; P8BE-NEXT: blr
; P8LE: sldi
; P8LE: lfsux
; P8LE: lfsx
; P8LE: lfs
; P8LE: xxmrghd
; P8LE-NEXT: xvcvdpsxds v2
; P8LE-NEXT: blr
@ -3693,11 +3693,11 @@ entry:
; P9LE-NEXT: xscvdpsxds
; P9LE-NEXT: xxspltd v2
; P9LE-NEXT: blr
; P8BE: lfsx
; P8BE: lfs
; P8BE-NEXT: xscvdpsxds
; P8BE-NEXT: xxspltd v2
; P8BE-NEXT: blr
; P8LE: lfsx
; P8LE: lfs
; P8LE-NEXT: xscvdpsxds
; P8LE-NEXT: xxspltd v2
; P8LE-NEXT: blr
@ -4412,13 +4412,13 @@ entry:
; P9LE: xxmrghd
; P9LE-NEXT: xvcvdpuxds v2
; P9LE-NEXT: blr
; P8BE: lfsx
; P8BE: lfsx
; P8BE: lfs
; P8BE: lfs
; P8BE: xxmrghd
; P8BE-NEXT: xvcvdpuxds v2
; P8BE-NEXT: blr
; P8LE: lfsx
; P8LE: lfsx
; P8LE: lfs
; P8LE: lfs
; P8LE: xxmrghd
; P8LE-NEXT: xvcvdpuxds v2
; P8LE-NEXT: blr
@ -4450,13 +4450,13 @@ entry:
; P9LE: xxmrghd
; P9LE-NEXT: xvcvdpuxds v2
; P9LE-NEXT: blr
; P8BE: lfsx
; P8BE: lfsx
; P8BE: lfs
; P8BE: lfs
; P8BE: xxmrghd
; P8BE-NEXT: xvcvdpuxds v2
; P8BE-NEXT: blr
; P8LE: lfsx
; P8LE: lfsx
; P8LE: lfs
; P8LE: lfs
; P8LE: xxmrghd
; P8LE-NEXT: xvcvdpuxds v2
; P8LE-NEXT: blr
@ -4495,13 +4495,13 @@ entry:
; P9LE-NEXT: blr
; P8BE: sldi
; P8BE: lfsux
; P8BE: lfsx
; P8BE: lfs
; P8BE: xxmrghd
; P8BE-NEXT: xvcvdpuxds v2
; P8BE-NEXT: blr
; P8LE: sldi
; P8LE: lfsux
; P8LE: lfsx
; P8LE: lfs
; P8LE: xxmrghd
; P8LE-NEXT: xvcvdpuxds v2
; P8LE-NEXT: blr
@ -4540,13 +4540,13 @@ entry:
; P9LE-NEXT: blr
; P8BE: sldi
; P8BE: lfsux
; P8BE: lfsx
; P8BE: lfs
; P8BE: xxmrghd
; P8BE-NEXT: xvcvdpuxds v2
; P8BE-NEXT: blr
; P8LE: sldi
; P8LE: lfsux
; P8LE: lfsx
; P8LE: lfs
; P8LE: xxmrghd
; P8LE-NEXT: xvcvdpuxds v2
; P8LE-NEXT: blr
@ -4597,11 +4597,11 @@ entry:
; P9LE-NEXT: xscvdpuxds
; P9LE-NEXT: xxspltd v2
; P9LE-NEXT: blr
; P8BE: lfsx
; P8BE: lfs
; P8BE-NEXT: xscvdpuxds
; P8BE-NEXT: xxspltd v2
; P8BE-NEXT: blr
; P8LE: lfsx
; P8LE: lfs
; P8LE-NEXT: xscvdpuxds
; P8LE-NEXT: xxspltd v2
; P8LE-NEXT: blr

View File

@ -0,0 +1,436 @@
# RUN: llc -start-after ppc-mi-peepholes -ppc-late-peephole %s -o - | FileCheck %s
--- |
; ModuleID = 'a.ll'
source_filename = "a.c"
target datalayout = "e-m:e-i64:64-n32:64"
target triple = "powerpc64le-unknown-linux-gnu"
; Function Attrs: norecurse nounwind readnone
define signext i32 @unsafeAddR0R3(i32 signext %a, i32 signext %b) local_unnamed_addr #0 {
entry:
%add = add nsw i32 %b, %a
ret i32 %add
}
; Function Attrs: norecurse nounwind readnone
define signext i32 @unsafeAddR3R0(i32 signext %a, i32 signext %b) local_unnamed_addr #0 {
entry:
%add = add nsw i32 %b, %a
ret i32 %add
}
; Function Attrs: norecurse nounwind readnone
define signext i32 @safeAddR0R3(i32 signext %a, i32 signext %b) local_unnamed_addr #0 {
entry:
%add = add nsw i32 %b, %a
ret i32 %add
}
; Function Attrs: norecurse nounwind readnone
define signext i32 @safeAddR3R0(i32 signext %a, i32 signext %b) local_unnamed_addr #0 {
entry:
%add = add nsw i32 %b, %a
ret i32 %add
}
; Function Attrs: norecurse nounwind readonly
define i64 @unsafeLDXR3R0(i64* nocapture readonly %ptr, i64 %off) local_unnamed_addr #1 {
entry:
%0 = bitcast i64* %ptr to i8*
%add.ptr = getelementptr inbounds i8, i8* %0, i64 %off
%1 = bitcast i8* %add.ptr to i64*
%2 = load i64, i64* %1, align 8, !tbaa !3
ret i64 %2
}
; Function Attrs: norecurse nounwind readonly
define i64 @safeLDXZeroR3(i64* nocapture readonly %ptr, i64 %off) local_unnamed_addr #1 {
entry:
%0 = bitcast i64* %ptr to i8*
%add.ptr = getelementptr inbounds i8, i8* %0, i64 %off
%1 = bitcast i8* %add.ptr to i64*
%2 = load i64, i64* %1, align 8, !tbaa !3
ret i64 %2
}
; Function Attrs: norecurse nounwind readonly
define i64 @safeLDXR3R0(i64* nocapture readonly %ptr, i64 %off) local_unnamed_addr #1 {
entry:
%0 = bitcast i64* %ptr to i8*
%add.ptr = getelementptr inbounds i8, i8* %0, i64 %off
%1 = bitcast i8* %add.ptr to i64*
%2 = load i64, i64* %1, align 8, !tbaa !3
ret i64 %2
}
attributes #0 = { norecurse nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+vsx,-power9-vector,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { norecurse nounwind readonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+vsx,-power9-vector,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
!llvm.module.flags = !{!0, !1}
!llvm.ident = !{!2}
!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{i32 7, !"PIC Level", i32 2}
!2 = !{!"clang version 6.0.0 (trunk 318832)"}
!3 = !{!4, !4, i64 0}
!4 = !{!"long long", !5, i64 0}
!5 = !{!"omnipotent char", !6, i64 0}
!6 = !{!"Simple C/C++ TBAA"}
...
---
name: unsafeAddR0R3
alignment: 4
exposesReturnsTwice: false
legalized: false
regBankSelected: false
selected: false
tracksRegLiveness: true
registers:
- { id: 0, class: g8rc, preferred-register: '' }
- { id: 1, class: g8rc, preferred-register: '' }
- { id: 2, class: gprc, preferred-register: '' }
- { id: 3, class: gprc, preferred-register: '' }
- { id: 4, class: gprc, preferred-register: '' }
- { id: 5, class: g8rc, preferred-register: '' }
liveins:
- { reg: '%x3', virtual-reg: '%0' }
- { reg: '%x4', virtual-reg: '%1' }
frameInfo:
isFrameAddressTaken: false
isReturnAddressTaken: false
hasStackMap: false
hasPatchPoint: false
stackSize: 0
offsetAdjustment: 0
maxAlignment: 0
adjustsStack: false
hasCalls: false
stackProtector: ''
maxCallFrameSize: 4294967295
hasOpaqueSPAdjustment: false
hasVAStart: false
hasMustTailInVarArgFunc: false
savePoint: ''
restorePoint: ''
fixedStack:
stack:
constants:
body: |
bb.0.entry:
liveins: %x0, %x4
%1:g8rc = COPY %x4
%0:g8rc = COPY %x0
%2:gprc = LI 44
%3:gprc = COPY %1.sub_32
%4:gprc = ADD4 killed %r0, killed %2
; CHECK: li 3, 44
; CHECK: add 3, 0, 3
%5:g8rc = EXTSW_32_64 killed %4
%x3 = COPY %5
BLR8 implicit %lr8, implicit %rm, implicit %x3
...
---
name: unsafeAddR3R0
alignment: 4
exposesReturnsTwice: false
legalized: false
regBankSelected: false
selected: false
tracksRegLiveness: true
registers:
- { id: 0, class: g8rc, preferred-register: '' }
- { id: 1, class: g8rc, preferred-register: '' }
- { id: 2, class: gprc, preferred-register: '' }
- { id: 3, class: gprc, preferred-register: '' }
- { id: 4, class: gprc, preferred-register: '' }
- { id: 5, class: g8rc, preferred-register: '' }
liveins:
- { reg: '%x3', virtual-reg: '%0' }
- { reg: '%x4', virtual-reg: '%1' }
frameInfo:
isFrameAddressTaken: false
isReturnAddressTaken: false
hasStackMap: false
hasPatchPoint: false
stackSize: 0
offsetAdjustment: 0
maxAlignment: 0
adjustsStack: false
hasCalls: false
stackProtector: ''
maxCallFrameSize: 4294967295
hasOpaqueSPAdjustment: false
hasVAStart: false
hasMustTailInVarArgFunc: false
savePoint: ''
restorePoint: ''
fixedStack:
stack:
constants:
body: |
bb.0.entry:
liveins: %x0, %x4
%1:g8rc = COPY %x4
%0:g8rc = COPY %x0
%2:gprc = COPY %0.sub_32
%3:gprc = LI 44
%4:gprc = ADD4 killed %3, killed %r0
; CHECK: li 3, 44
; CHECK: add 3, 3, 0
%5:g8rc = EXTSW_32_64 killed %4
%x3 = COPY %5
BLR8 implicit %lr8, implicit %rm, implicit %x3
...
---
name: safeAddR0R3
alignment: 4
exposesReturnsTwice: false
legalized: false
regBankSelected: false
selected: false
tracksRegLiveness: true
registers:
- { id: 0, class: g8rc, preferred-register: '' }
- { id: 1, class: g8rc, preferred-register: '' }
- { id: 2, class: gprc, preferred-register: '' }
- { id: 3, class: gprc, preferred-register: '' }
- { id: 4, class: gprc, preferred-register: '' }
- { id: 5, class: g8rc, preferred-register: '' }
liveins:
- { reg: '%x3', virtual-reg: '%0' }
- { reg: '%x4', virtual-reg: '%1' }
frameInfo:
isFrameAddressTaken: false
isReturnAddressTaken: false
hasStackMap: false
hasPatchPoint: false
stackSize: 0
offsetAdjustment: 0
maxAlignment: 0
adjustsStack: false
hasCalls: false
stackProtector: ''
maxCallFrameSize: 4294967295
hasOpaqueSPAdjustment: false
hasVAStart: false
hasMustTailInVarArgFunc: false
savePoint: ''
restorePoint: ''
fixedStack:
stack:
constants:
body: |
bb.0.entry:
liveins: %x3, %x4
%1:g8rc = COPY %x4
%0:g8rc = COPY %x3
%2:gprc = COPY %0.sub_32
%r0 = LI 44
%4:gprc = ADD4 killed %r0, killed %2
; CHECK: addi 3, 3, 44
%5:g8rc = EXTSW_32_64 killed %4
%x3 = COPY %5
BLR8 implicit %lr8, implicit %rm, implicit %x3
...
---
name: safeAddR3R0
alignment: 4
exposesReturnsTwice: false
legalized: false
regBankSelected: false
selected: false
tracksRegLiveness: true
registers:
- { id: 0, class: g8rc, preferred-register: '' }
- { id: 1, class: g8rc, preferred-register: '' }
- { id: 2, class: gprc, preferred-register: '' }
- { id: 3, class: gprc, preferred-register: '' }
- { id: 4, class: gprc, preferred-register: '' }
- { id: 5, class: g8rc, preferred-register: '' }
liveins:
- { reg: '%x3', virtual-reg: '%0' }
- { reg: '%x4', virtual-reg: '%1' }
frameInfo:
isFrameAddressTaken: false
isReturnAddressTaken: false
hasStackMap: false
hasPatchPoint: false
stackSize: 0
offsetAdjustment: 0
maxAlignment: 0
adjustsStack: false
hasCalls: false
stackProtector: ''
maxCallFrameSize: 4294967295
hasOpaqueSPAdjustment: false
hasVAStart: false
hasMustTailInVarArgFunc: false
savePoint: ''
restorePoint: ''
fixedStack:
stack:
constants:
body: |
bb.0.entry:
liveins: %x3, %x4
%1:g8rc = COPY %x4
%0:g8rc = COPY %x3
%2:gprc = COPY %0.sub_32
%r0 = LI 44
%4:gprc = ADD4 killed %2, killed %r0
; CHECK: addi 3, 3, 44
%5:g8rc = EXTSW_32_64 killed %4
%x3 = COPY %5
BLR8 implicit %lr8, implicit %rm, implicit %x3
...
---
name: unsafeLDXR3R0
alignment: 4
exposesReturnsTwice: false
legalized: false
regBankSelected: false
selected: false
tracksRegLiveness: true
registers:
- { id: 0, class: g8rc_and_g8rc_nox0, preferred-register: '' }
- { id: 1, class: g8rc, preferred-register: '' }
- { id: 2, class: g8rc, preferred-register: '' }
liveins:
- { reg: '%x0', virtual-reg: '%0' }
- { reg: '%x4', virtual-reg: '%1' }
frameInfo:
isFrameAddressTaken: false
isReturnAddressTaken: false
hasStackMap: false
hasPatchPoint: false
stackSize: 0
offsetAdjustment: 0
maxAlignment: 0
adjustsStack: false
hasCalls: false
stackProtector: ''
maxCallFrameSize: 4294967295
hasOpaqueSPAdjustment: false
hasVAStart: false
hasMustTailInVarArgFunc: false
savePoint: ''
restorePoint: ''
fixedStack:
stack:
constants:
body: |
bb.0.entry:
liveins: %x0, %x4
%1:g8rc = COPY %x4
%0:g8rc_and_g8rc_nox0 = LI8 44
%2:g8rc = LDX %0, %x0 :: (load 8 from %ir.1, !tbaa !3)
; CHECK: li 3, 44
; CHECK: ldx 3, 3, 0
%x3 = COPY %2
BLR8 implicit %lr8, implicit %rm, implicit %x3
...
---
name: safeLDXZeroR3
alignment: 4
exposesReturnsTwice: false
legalized: false
regBankSelected: false
selected: false
tracksRegLiveness: true
registers:
- { id: 0, class: g8rc_and_g8rc_nox0, preferred-register: '' }
- { id: 1, class: g8rc, preferred-register: '' }
- { id: 2, class: g8rc, preferred-register: '' }
liveins:
- { reg: '%x3', virtual-reg: '%0' }
- { reg: '%x4', virtual-reg: '%1' }
frameInfo:
isFrameAddressTaken: false
isReturnAddressTaken: false
hasStackMap: false
hasPatchPoint: false
stackSize: 0
offsetAdjustment: 0
maxAlignment: 0
adjustsStack: false
hasCalls: false
stackProtector: ''
maxCallFrameSize: 4294967295
hasOpaqueSPAdjustment: false
hasVAStart: false
hasMustTailInVarArgFunc: false
savePoint: ''
restorePoint: ''
fixedStack:
stack:
constants:
body: |
bb.0.entry:
liveins: %x3, %x4
%1:g8rc = LI8 44
%0:g8rc_and_g8rc_nox0 = LI8 44
%2:g8rc = LDX %zero8, %1 :: (load 8 from %ir.1, !tbaa !3)
; CHECK: ld 3, 44(0)
%x3 = COPY %2
BLR8 implicit %lr8, implicit %rm, implicit %x3
...
---
name: safeLDXR3R0
alignment: 4
exposesReturnsTwice: false
legalized: false
regBankSelected: false
selected: false
tracksRegLiveness: true
registers:
- { id: 0, class: g8rc_and_g8rc_nox0, preferred-register: '' }
- { id: 1, class: g8rc, preferred-register: '' }
- { id: 2, class: g8rc, preferred-register: '' }
liveins:
- { reg: '%x3', virtual-reg: '%0' }
- { reg: '%x4', virtual-reg: '%1' }
frameInfo:
isFrameAddressTaken: false
isReturnAddressTaken: false
hasStackMap: false
hasPatchPoint: false
stackSize: 0
offsetAdjustment: 0
maxAlignment: 0
adjustsStack: false
hasCalls: false
stackProtector: ''
maxCallFrameSize: 4294967295
hasOpaqueSPAdjustment: false
hasVAStart: false
hasMustTailInVarArgFunc: false
savePoint: ''
restorePoint: ''
fixedStack:
stack:
constants:
body: |
bb.0.entry:
liveins: %x3, %x4
%x0 = LI8 44
%0:g8rc_and_g8rc_nox0 = COPY %x3
%2:g8rc = LDX %0, %x0 :: (load 8 from %ir.1, !tbaa !3)
; CHECK: ld 3, 44(3)
%x3 = COPY %2
BLR8 implicit %lr8, implicit %rm, implicit %x3
...

File diff suppressed because it is too large Load Diff

View File

@ -37,9 +37,13 @@ define void @foo(i8 %a, i16 %b) nounwind {
;; A few test to check materialization
%5 = call i32 @t2(i8 zeroext 255)
; ELF64: clrldi {{[0-9]+}}, {{[0-9]+}}, 56
; ELF64: li 3, 255
; ELF64-NOT: clrldi
%6 = call i32 @t4(i16 zeroext 65535)
; ELF64: clrldi {{[0-9]+}}, {{[0-9]+}}, 48
; ELF64: lis 3, 0
; ELF64: ori 3, 3, 65535
; ELF64: clrldi 3, 3, 48
; ELF64: bl t4
ret void
}
@ -66,12 +70,8 @@ entry:
; ELF64: li 6, 28
; ELF64: li 7, 40
; ELF64: li 8, 186
; ELF64: clrldi 3, 3, 56
; ELF64: clrldi 4, 4, 56
; ELF64: clrldi 5, 5, 56
; ELF64: clrldi 6, 6, 56
; ELF64: clrldi 7, 7, 56
; ELF64: clrldi 8, 8, 56
; ELF64-NOT: clrldi
; ELF64: bl bar
ret i32 0
}

View File

@ -418,9 +418,9 @@ define <4 x i1> @any_sign_bits_clear_vec(<4 x i32> %P, <4 x i32> %Q) {
define zeroext i1 @ne_neg1_and_ne_zero(i64 %x) {
; CHECK-LABEL: ne_neg1_and_ne_zero:
; CHECK: # %bb.0:
; CHECK-NEXT: li 4, 1
; CHECK-NEXT: addi 3, 3, 1
; CHECK-NEXT: subfc 3, 3, 4
; CHECK-NEXT: li 4, 1
; CHECK-NEXT: subfic 3, 3, 1
; CHECK-NEXT: subfe 3, 4, 4
; CHECK-NEXT: neg 3, 3
; CHECK-NEXT: blr

View File

@ -0,0 +1,51 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=powerpc64le-unknown-unknown -mcpu=pwr8 \
; RUN: -ppc-convert-rr-to-ri -verify-machineinstrs | FileCheck %s
define void @test(i32 zeroext %parts) {
; CHECK-LABEL: test:
; CHECK: # %bb.0: # %cond.end.i
; CHECK-NEXT: cmplwi 0, 3, 1
; CHECK-NEXT: bnelr+ 0
; CHECK-NEXT: # %bb.1: # %test2.exit.us.unr-lcssa
; CHECK-NEXT: ld 3, 0(3)
; CHECK-NEXT: std 3, 0(3)
entry:
br label %cond.end.i
cond.end.i: ; preds = %entry
%cmp18.i = icmp eq i32 %parts, 1
br i1 %cmp18.i, label %while.body.lr.ph.i.us.preheader, label %test3.exit.split
while.body.lr.ph.i.us.preheader: ; preds = %cond.end.i
%0 = icmp eq i32 %parts, 1
br label %for.body.i62.us.preheader
for.body.i62.us.preheader: ; preds = %while.body.lr.ph.i.us.preheader
br i1 %0, label %test2.exit.us.unr-lcssa, label %for.body.i62.us.preheader.new
for.body.i62.us.preheader.new: ; preds = %for.body.i62.us.preheader
br label %for.body.i62.us
for.body.i62.us: ; preds = %if.end.i.us.1, %for.body.i62.us.preheader.new
%niter = phi i64 [ undef, %for.body.i62.us.preheader.new ], [ %niter.nsub.1, %if.end.i.us.1 ]
%cmp8.i.us.1 = icmp uge i64 undef, 0
br label %if.end.i.us.1
test2.exit.us.unr-lcssa: ; preds = %if.end.i.us.1, %for.body.i62.us.preheader
%c.addr.036.i.us.unr = phi i64 [ 0, %for.body.i62.us.preheader ], [ %c.addr.1.i.us.1, %if.end.i.us.1 ]
%1 = load i64, i64* undef, align 8
%tobool.i61.us.epil = icmp eq i64 %c.addr.036.i.us.unr, 0
%add.neg.i.us.epil.pn = select i1 %tobool.i61.us.epil, i64 %1, i64 0
%storemerge269 = sub i64 %add.neg.i.us.epil.pn, 0
store i64 %storemerge269, i64* undef, align 8
unreachable
test3.exit.split: ; preds = %cond.end.i
ret void
if.end.i.us.1: ; preds = %for.body.i62.us
%c.addr.1.i.us.1 = zext i1 %cmp8.i.us.1 to i64
%niter.nsub.1 = add i64 %niter, -2
%niter.ncmp.1 = icmp eq i64 %niter.nsub.1, 0
br i1 %niter.ncmp.1, label %test2.exit.us.unr-lcssa, label %for.body.i62.us
}

View File

@ -89,7 +89,7 @@ entry:
; CHECK: @foo6
; CHECK-DAG: ld
; CHECK-DAG: ld
; CHECK-DAG: stdx
; CHECK-DAG: std
; CHECK: stdx
; For VSX on P7, unaligned loads and stores are preferable to aligned

View File

@ -70,9 +70,9 @@ entry:
; CHECK-LABEL: @getf
; CHECK-P7-LABEL: @getf
; CHECK-BE-LABEL: @getf
; CHECK: li [[IMMREG:[0-9]+]], 3
; CHECK: xor [[TRUNCREG:[0-9]+]], [[IMMREG]], 5
; CHECK: lvsl [[SHMSKREG:[0-9]+]], 0, [[TRUNCREG]]
; CHECK: xori [[TRUNCREG:[0-9]+]], 5, 3
; CHECK: sldi [[SHIFTREG:[0-9]+]], [[TRUNCREG]], 2
; CHECK: lvsl [[SHMSKREG:[0-9]+]], 0, [[SHIFTREG]]
; CHECK: vperm {{[0-9]+}}, 2, 2, [[SHMSKREG]]
; CHECK: xscvspdpn 1,
; CHECK-P7-DAG: rlwinm [[ELEMOFFREG:[0-9]+]], 5, 2, 28, 29