[PowerPC] Convert r+r instructions to r+i (pre and post RA)

This patch adds the necessary infrastructure to convert instructions that take two register operands to those that take a register and immediate if the necessary operand is produced by a load-immediate. Furthermore, it uses this infrastructure to perform such conversions twice - first at MachineSSA and then pre-emit. There are a number of reasons we may end up with opportunities for this transformation, including but not limited to: - X-Form instructions chosen since the exact offset isn't available at ISEL time - Atomic instructions with constant operands (we will add patterns for this in the future) - Tail duplication may duplicate code where one block contains this redundancy - When emitting compare-free code in PPCDAGToDAGISel, we don't handle constant comparands specially Furthermore, this patch moves the initialization of PPCMIPeepholePass so that it can be used for MIR tests. llvm-svn: 320791
2024-12-14 15:19:33 +00:00 · 2017-12-15 07:27:53 +00:00 · 2017-12-15 07:27:53 +00:00 · b8102d2cc0
commit b8102d2cc0
parent 14d377e2bb
17 changed files with 7699 additions and 90 deletions
--- a/lib/Target/PowerPC/CMakeLists.txt
+++ b/lib/Target/PowerPC/CMakeLists.txt
@ -43,6 +43,7 @@ add_llvm_target(PowerPCCodeGen
  PPCVSXFMAMutate.cpp
  PPCVSXSwapRemoval.cpp
  PPCExpandISEL.cpp
+  PPCPreEmitPeephole.cpp
  )

 add_subdirectory(AsmParser)
--- a/lib/Target/PowerPC/PPC.h
+++ b/lib/Target/PowerPC/PPC.h
@ -50,6 +50,7 @@ namespace llvm {
  FunctionPass *createPPCTLSDynamicCallPass();
  FunctionPass *createPPCBoolRetToIntPass();
  FunctionPass *createPPCExpandISELPass();
+  FunctionPass *createPPCPreEmitPeepholePass();
  void LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
                                    AsmPrinter &AP, bool isDarwin);
  bool LowerPPCMachineOperandToMCOperand(const MachineOperand &MO,
@ -59,7 +60,9 @@ namespace llvm {
  void initializePPCVSXFMAMutatePass(PassRegistry&);
  void initializePPCBoolRetToIntPass(PassRegistry&);
  void initializePPCExpandISELPass(PassRegistry &);
+  void initializePPCPreEmitPeepholePass(PassRegistry &);
  void initializePPCTLSDynamicCallPass(PassRegistry &);
+  void initializePPCMIPeepholePass(PassRegistry&);
  extern char &PPCVSXFMAMutateID;

  namespace PPCII {
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@ -194,6 +194,11 @@ def : Pat<(PPCcall_nop (i64 texternalsym:$dst)),
          (BL8_NOP texternalsym:$dst)>;

 // Atomic operations
+// FIXME: some of these might be used with constant operands. This will result
+// in constant materialization instructions that may be redundant. We currently
+// clean this up in PPCMIPeephole with calls to
+// PPCInstrInfo::convertToImmediateForm() but we should probably not emit them
+// in the first place.
 let usesCustomInserter = 1 in {
  let Defs = [CR0] in {
    def ATOMIC_LOAD_ADD_I64 : Pseudo<
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@ -51,6 +51,10 @@ STATISTIC(NumStoreSPILLVSRRCAsVec,
 STATISTIC(NumStoreSPILLVSRRCAsGpr,
          "Number of spillvsrrc spilled to stack as gpr");
 STATISTIC(NumGPRtoVSRSpill, "Number of gpr spills to spillvsrrc");
+STATISTIC(CmpIselsConverted,
+          "Number of ISELs that depend on comparison of constants converted");
+STATISTIC(MissedConvertibleImmediateInstrs,
+          "Number of compare-immediate instructions fed by constants");

 static cl::
 opt<bool> DisableCTRLoopAnal("disable-ppc-ctrloop-analysis", cl::Hidden,
@ -2147,6 +2151,816 @@ bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
  return false;
 }

+unsigned PPCInstrInfo::lookThruCopyLike(unsigned SrcReg,
+                                        const MachineRegisterInfo *MRI) {
+  while (true) {
+    MachineInstr *MI = MRI->getVRegDef(SrcReg);
+    if (!MI->isCopyLike())
+      return SrcReg;
+
+    unsigned CopySrcReg;
+    if (MI->isCopy())
+      CopySrcReg = MI->getOperand(1).getReg();
+    else {
+      assert(MI->isSubregToReg() && "Bad opcode for lookThruCopyLike");
+      CopySrcReg = MI->getOperand(2).getReg();
+    }
+
+    if (!TargetRegisterInfo::isVirtualRegister(CopySrcReg))
+      return CopySrcReg;
+
+    SrcReg = CopySrcReg;
+  }
+}
+
+// Essentially a compile-time implementation of a compare->isel sequence.
+// It takes two constants to compare, along with the true/false registers
+// and the comparison type (as a subreg to a CR field) and returns one
+// of the true/false registers, depending on the comparison results.
+static unsigned selectReg(int64_t Imm1, int64_t Imm2, unsigned CompareOpc,
+                          unsigned TrueReg, unsigned FalseReg,
+                          unsigned CRSubReg) {
+  // Signed comparisons. The immediates are assumed to be sign-extended.
+  if (CompareOpc == PPC::CMPWI || CompareOpc == PPC::CMPDI) {
+    switch (CRSubReg) {
+    default: llvm_unreachable("Unknown integer comparison type.");
+    case PPC::sub_lt:
+      return Imm1 < Imm2 ? TrueReg : FalseReg;
+    case PPC::sub_gt:
+      return Imm1 > Imm2 ? TrueReg : FalseReg;
+    case PPC::sub_eq:
+      return Imm1 == Imm2 ? TrueReg : FalseReg;
+    }
+  }
+  // Unsigned comparisons.
+  else if (CompareOpc == PPC::CMPLWI || CompareOpc == PPC::CMPLDI) {
+    switch (CRSubReg) {
+    default: llvm_unreachable("Unknown integer comparison type.");
+    case PPC::sub_lt:
+      return (uint64_t)Imm1 < (uint64_t)Imm2 ? TrueReg : FalseReg;
+    case PPC::sub_gt:
+      return (uint64_t)Imm1 > (uint64_t)Imm2 ? TrueReg : FalseReg;
+    case PPC::sub_eq:
+      return Imm1 == Imm2 ? TrueReg : FalseReg;
+    }
+  }
+  return PPC::NoRegister;
+}
+
+// Replace an instruction with one that materializes a constant (and sets
+// CR0 if the original instruction was a record-form instruction).
+void PPCInstrInfo::replaceInstrWithLI(MachineInstr &MI,
+                                      const LoadImmediateInfo &LII) const {
+  // Remove existing operands.
+  int OperandToKeep = LII.SetCR ? 1 : 0;
+  for (int i = MI.getNumOperands() - 1; i > OperandToKeep; i--)
+    MI.RemoveOperand(i);
+
+  // Replace the instruction.
+  if (LII.SetCR)
+    MI.setDesc(get(LII.Is64Bit ? PPC::ANDIo8 : PPC::ANDIo));
+  else
+    MI.setDesc(get(LII.Is64Bit ? PPC::LI8 : PPC::LI));
+
+  // Set the immediate.
+  MachineInstrBuilder(*MI.getParent()->getParent(), MI)
+      .addImm(LII.Imm);
+}
+
+MachineInstr *PPCInstrInfo::getConstantDefMI(MachineInstr &MI,
+                                             unsigned &ConstOp,
+                                             bool &SeenIntermediateUse) const {
+  ConstOp = ~0U;
+  MachineInstr *DefMI = nullptr;
+  MachineRegisterInfo *MRI = &MI.getParent()->getParent()->getRegInfo();
+  // If we'ere in SSA, get the defs through the MRI. Otherwise, only look
+  // within the basic block to see if the register is defined using an LI/LI8.
+  if (MRI->isSSA()) {
+    for (int i = 1, e = MI.getNumOperands(); i < e; i++) {
+      if (!MI.getOperand(i).isReg())
+        continue;
+      unsigned Reg = MI.getOperand(i).getReg();
+      if (!TargetRegisterInfo::isVirtualRegister(Reg))
+        continue;
+      unsigned TrueReg = lookThruCopyLike(Reg, MRI);
+      if (TargetRegisterInfo::isVirtualRegister(TrueReg)) {
+        DefMI = MRI->getVRegDef(TrueReg);
+        if (DefMI->getOpcode() == PPC::LI || DefMI->getOpcode() == PPC::LI8) {
+          ConstOp = i;
+          break;
+        }
+      }
+    }
+  } else {
+    // Looking back through the definition for each operand could be expensive,
+    // so exit early if this isn't an instruction that either has an immediate
+    // form or is already an immediate form that we can handle.
+    ImmInstrInfo III;
+    unsigned Opc = MI.getOpcode();
+    bool ConvertibleImmForm =
+      Opc == PPC::CMPWI || Opc == PPC::CMPLWI ||
+      Opc == PPC::CMPDI || Opc == PPC::CMPLDI ||
+      Opc == PPC::ADDI || Opc == PPC::ADDI8 ||
+      Opc == PPC::ORI || Opc == PPC::ORI8 ||
+      Opc == PPC::XORI || Opc == PPC::XORI8 ||
+      Opc == PPC::RLDICL || Opc == PPC::RLDICLo ||
+      Opc == PPC::RLDICL_32 || Opc == PPC::RLDICL_32_64 ||
+      Opc == PPC::RLWINM || Opc == PPC::RLWINMo ||
+      Opc == PPC::RLWINM8 || Opc == PPC::RLWINM8o;
+    if (!instrHasImmForm(MI, III) && !ConvertibleImmForm)
+      return nullptr;
+
+    // Don't convert or %X, %Y, %Y since that's just a register move.
+    if ((Opc == PPC::OR || Opc == PPC::OR8) &&
+        MI.getOperand(1).getReg() == MI.getOperand(2).getReg())
+      return nullptr;
+    for (int i = 1, e = MI.getNumOperands(); i < e; i++) {
+      MachineOperand &MO = MI.getOperand(i);
+      SeenIntermediateUse = false;
+      if (MO.isReg() && MO.isUse() && !MO.isImplicit()) {
+        MachineBasicBlock::reverse_iterator E = MI.getParent()->rend(), It = MI;
+        It++;
+        unsigned Reg = MI.getOperand(i).getReg();
+
+        // Is this register defined by a load-immediate in this block?
+        for ( ; It != E; ++It) {
+          if (It->modifiesRegister(Reg, &getRegisterInfo())) {
+            if (It->getOpcode() == PPC::LI || It->getOpcode() == PPC::LI8) {
+              ConstOp = i;
+              return &*It;
+            } else
+              break;
+          } else if (It->readsRegister(Reg, &getRegisterInfo()))
+            // If we see another use of this reg between the def and the MI,
+            // we want to flat it so the def isn't deleted.
+            SeenIntermediateUse = true;
+        }
+      }
+    }
+  }
+  return ConstOp == ~0U ? nullptr : DefMI;
+}
+
+// If this instruction has an immediate form and one of its operands is a
+// result of a load-immediate, convert it to the immediate form if the constant
+// is in range.
+bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI,
+                                          MachineInstr **KilledDef) const {
+  MachineFunction *MF = MI.getParent()->getParent();
+  MachineRegisterInfo *MRI = &MF->getRegInfo();
+  bool PostRA = !MRI->isSSA();
+  bool SeenIntermediateUse = true;
+  unsigned ConstantOperand = ~0U;
+  MachineInstr *DefMI = getConstantDefMI(MI, ConstantOperand,
+                                         SeenIntermediateUse);
+  if (!DefMI || !DefMI->getOperand(1).isImm())
+    return false;
+  assert(ConstantOperand < MI.getNumOperands() &&
+         "The constant operand needs to be valid at this point");
+
+  int64_t Immediate = DefMI->getOperand(1).getImm();
+  // Sign-extend to 64-bits.
+  int64_t SExtImm = ((uint64_t)Immediate & ~0x7FFFuLL) != 0 ?
+    (Immediate | 0xFFFFFFFFFFFF0000) : Immediate;
+
+  if (KilledDef && MI.getOperand(ConstantOperand).isKill() &&
+      !SeenIntermediateUse)
+    *KilledDef = DefMI;
+
+  // If this is a reg+reg instruction that has a reg+imm form, convert it now.
+  ImmInstrInfo III;
+  if (instrHasImmForm(MI, III))
+    return transformToImmForm(MI, III, ConstantOperand, SExtImm);
+
+  bool ReplaceWithLI = false;
+  bool Is64BitLI = false;
+  int64_t NewImm = 0;
+  bool SetCR = false;
+  unsigned Opc = MI.getOpcode();
+  switch (Opc) {
+  default: return false;
+
+  // FIXME: Any branches conditional on such a comparison can be made
+  // unconditional. At this time, this happens too infrequently to be worth
+  // the implementation effort, but if that ever changes, we could convert
+  // such a pattern here.
+  case PPC::CMPWI:
+  case PPC::CMPLWI:
+  case PPC::CMPDI:
+  case PPC::CMPLDI: {
+    // Doing this post-RA would require dataflow analysis to reliably find uses
+    // of the CR register set by the compare.
+    if (PostRA)
+      return false;
+    // If a compare-immediate is fed by an immediate and is itself an input of
+    // an ISEL (the most common case) into a COPY of the correct register.
+    bool Changed = false;
+    unsigned DefReg = MI.getOperand(0).getReg();
+    int64_t Comparand = MI.getOperand(2).getImm();
+    int64_t SExtComparand = ((uint64_t)Comparand & ~0x7FFFuLL) != 0 ?
+      (Comparand | 0xFFFFFFFFFFFF0000) : Comparand;
+
+    for (auto &CompareUseMI : MRI->use_instructions(DefReg)) {
+      unsigned UseOpc = CompareUseMI.getOpcode();
+      if (UseOpc != PPC::ISEL && UseOpc != PPC::ISEL8)
+        continue;
+      unsigned CRSubReg = CompareUseMI.getOperand(3).getSubReg();
+      unsigned TrueReg = CompareUseMI.getOperand(1).getReg();
+      unsigned FalseReg = CompareUseMI.getOperand(2).getReg();
+      unsigned RegToCopy = selectReg(SExtImm, SExtComparand, Opc, TrueReg,
+                                     FalseReg, CRSubReg);
+      if (RegToCopy == PPC::NoRegister)
+        continue;
+      // Can't use PPC::COPY to copy PPC::ZERO[8]. Convert it to LI[8] 0.
+      if (RegToCopy == PPC::ZERO || RegToCopy == PPC::ZERO8) {
+        CompareUseMI.setDesc(get(UseOpc == PPC::ISEL8 ? PPC::LI8 : PPC::LI));
+        CompareUseMI.getOperand(1).ChangeToImmediate(0);
+        CompareUseMI.RemoveOperand(3);
+        CompareUseMI.RemoveOperand(2);
+        continue;
+      }
+      DEBUG(dbgs() << "Found LI -> CMPI -> ISEL, replacing with a copy.\n");
+      DEBUG(DefMI->dump(); MI.dump(); CompareUseMI.dump());
+      DEBUG(dbgs() << "Is converted to:\n");
+      // Convert to copy and remove unneeded operands.
+      CompareUseMI.setDesc(get(PPC::COPY));
+      CompareUseMI.RemoveOperand(3);
+      CompareUseMI.RemoveOperand(RegToCopy == TrueReg ? 2 : 1);
+      CmpIselsConverted++;
+      Changed = true;
+      DEBUG(CompareUseMI.dump());
+    }
+    if (Changed)
+      return true;
+    // This may end up incremented multiple times since this function is called
+    // during a fixed-point transformation, but it is only meant to indicate the
+    // presence of this opportunity.
+    MissedConvertibleImmediateInstrs++;
+    return false;
+  }
+
+  // Immediate forms - may simply be convertable to an LI.
+  case PPC::ADDI:
+  case PPC::ADDI8: {
+    // Does the sum fit in a 16-bit signed field?
+    int64_t Addend = MI.getOperand(2).getImm();
+    if (isInt<16>(Addend + SExtImm)) {
+      ReplaceWithLI = true;
+      Is64BitLI = Opc == PPC::ADDI8;
+      NewImm = Addend + SExtImm;
+      break;
+    }
+  }
+  case PPC::RLDICL:
+  case PPC::RLDICLo:
+  case PPC::RLDICL_32:
+  case PPC::RLDICL_32_64: {
+    // Use APInt's rotate function.
+    int64_t SH = MI.getOperand(2).getImm();
+    int64_t MB = MI.getOperand(3).getImm();
+    APInt InVal(Opc == PPC::RLDICL ? 64 : 32, SExtImm, true);
+    InVal = InVal.rotl(SH);
+    uint64_t Mask = (1LU << (63 - MB + 1)) - 1;
+    InVal &= Mask;
+    // Can't replace negative values with an LI as that will sign-extend
+    // and not clear the left bits. If we're setting the CR bit, we will use
+    // ANDIo which won't sign extend, so that's safe.
+    if (isUInt<15>(InVal.getSExtValue()) ||
+        (Opc == PPC::RLDICLo && isUInt<16>(InVal.getSExtValue()))) {
+      ReplaceWithLI = true;
+      Is64BitLI = Opc != PPC::RLDICL_32;
+      NewImm = InVal.getSExtValue();
+      SetCR = Opc == PPC::RLDICLo;
+      break;
+    }
+    return false;
+  }
+  case PPC::RLWINM:
+  case PPC::RLWINM8:
+  case PPC::RLWINMo:
+  case PPC::RLWINM8o: {
+    int64_t SH = MI.getOperand(2).getImm();
+    int64_t MB = MI.getOperand(3).getImm();
+    int64_t ME = MI.getOperand(4).getImm();
+    APInt InVal(32, SExtImm, true);
+    InVal = InVal.rotl(SH);
+    // Set the bits (       MB + 32      ) to (       ME + 32      ).
+    uint64_t Mask = ((1 << (32 - MB)) - 1) & ~((1 << (31 - ME)) - 1);
+    InVal &= Mask;
+    // Can't replace negative values with an LI as that will sign-extend
+    // and not clear the left bits. If we're setting the CR bit, we will use
+    // ANDIo which won't sign extend, so that's safe.
+    bool ValueFits = isUInt<15>(InVal.getSExtValue());
+    ValueFits |= ((Opc == PPC::RLWINMo || Opc == PPC::RLWINM8o) &&
+                  isUInt<16>(InVal.getSExtValue()));
+    if (ValueFits) {
+      ReplaceWithLI = true;
+      Is64BitLI = Opc == PPC::RLWINM8 || Opc == PPC::RLWINM8o;
+      NewImm = InVal.getSExtValue();
+      SetCR = Opc == PPC::RLWINMo || Opc == PPC::RLWINM8o;
+      break;
+    }
+    return false;
+  }
+  case PPC::ORI:
+  case PPC::ORI8:
+  case PPC::XORI:
+  case PPC::XORI8: {
+    int64_t LogicalImm = MI.getOperand(2).getImm();
+    int64_t Result = 0;
+    if (Opc == PPC::ORI || Opc == PPC::ORI8)
+      Result = LogicalImm | SExtImm;
+    else
+      Result = LogicalImm ^ SExtImm;
+    if (isInt<16>(Result)) {
+      ReplaceWithLI = true;
+      Is64BitLI = Opc == PPC::ORI8 || Opc == PPC::XORI8;
+      NewImm = Result;
+      break;
+    }
+    return false;
+  }
+  }
+
+  if (ReplaceWithLI) {
+    DEBUG(dbgs() << "Replacing instruction:\n");
+    DEBUG(MI.dump());
+    DEBUG(dbgs() << "Fed by:\n");
+    DEBUG(DefMI->dump());
+    LoadImmediateInfo LII;
+    LII.Imm = NewImm;
+    LII.Is64Bit = Is64BitLI;
+    LII.SetCR = SetCR;
+    // If we're setting the CR, the original load-immediate must be kept (as an
+    // operand to ANDIo/ANDI8o).
+    if (KilledDef && SetCR)
+      *KilledDef = nullptr;
+    replaceInstrWithLI(MI, LII);
+    DEBUG(dbgs() << "With:\n");
+    DEBUG(MI.dump());
+    return true;
+  }
+  return false;
+}
+
+bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI,
+                                   ImmInstrInfo &III) const {
+  unsigned Opc = MI.getOpcode();
+  // The vast majority of the instructions would need their operand 2 replaced
+  // with an immediate when switching to the reg+imm form. A marked exception
+  // are the update form loads/stores for which a constant operand 2 would need
+  // to turn into a displacement and move operand 1 to the operand 2 position.
+  III.ImmOpNo = 2;
+  III.ConstantOpNo = 2;
+  III.ImmWidth = 16;
+  III.ImmMustBeMultipleOf = 1;
+  switch (Opc) {
+  default: return false;
+  case PPC::ADD4:
+  case PPC::ADD8:
+    III.SignedImm = true;
+    III.ZeroIsSpecialOrig = 0;
+    III.ZeroIsSpecialNew = 1;
+    III.IsCommutative = true;
+    III.ImmOpcode = Opc == PPC::ADD4 ? PPC::ADDI : PPC::ADDI8;
+    break;
+  case PPC::ADDC:
+  case PPC::ADDC8:
+    III.SignedImm = true;
+    III.ZeroIsSpecialOrig = 0;
+    III.ZeroIsSpecialNew = 0;
+    III.IsCommutative = true;
+    III.ImmOpcode = Opc == PPC::ADDC ? PPC::ADDIC : PPC::ADDIC8;
+    break;
+  case PPC::ADDCo:
+    III.SignedImm = true;
+    III.ZeroIsSpecialOrig = 0;
+    III.ZeroIsSpecialNew = 0;
+    III.IsCommutative = true;
+    III.ImmOpcode = PPC::ADDICo;
+    break;
+  case PPC::SUBFC:
+  case PPC::SUBFC8:
+    III.SignedImm = true;
+    III.ZeroIsSpecialOrig = 0;
+    III.ZeroIsSpecialNew = 0;
+    III.IsCommutative = false;
+    III.ImmOpcode = Opc == PPC::SUBFC ? PPC::SUBFIC : PPC::SUBFIC8;
+    break;
+  case PPC::CMPW:
+  case PPC::CMPD:
+    III.SignedImm = true;
+    III.ZeroIsSpecialOrig = 0;
+    III.ZeroIsSpecialNew = 0;
+    III.IsCommutative = false;
+    III.ImmOpcode = Opc == PPC::CMPW ? PPC::CMPWI : PPC::CMPDI;
+    break;
+  case PPC::CMPLW:
+  case PPC::CMPLD:
+    III.SignedImm = false;
+    III.ZeroIsSpecialOrig = 0;
+    III.ZeroIsSpecialNew = 0;
+    III.IsCommutative = false;
+    III.ImmOpcode = Opc == PPC::CMPLW ? PPC::CMPLWI : PPC::CMPLDI;
+    break;
+  case PPC::ANDo:
+  case PPC::AND8o:
+  case PPC::OR:
+  case PPC::OR8:
+  case PPC::XOR:
+  case PPC::XOR8:
+    III.SignedImm = false;
+    III.ZeroIsSpecialOrig = 0;
+    III.ZeroIsSpecialNew = 0;
+    III.IsCommutative = true;
+    switch(Opc) {
+    default: llvm_unreachable("Unknown opcode");
+    case PPC::ANDo: III.ImmOpcode = PPC::ANDIo; break;
+    case PPC::AND8o: III.ImmOpcode = PPC::ANDIo8; break;
+    case PPC::OR: III.ImmOpcode = PPC::ORI; break;
+    case PPC::OR8: III.ImmOpcode = PPC::ORI8; break;
+    case PPC::XOR: III.ImmOpcode = PPC::XORI; break;
+    case PPC::XOR8: III.ImmOpcode = PPC::XORI8; break;
+    }
+    break;
+  case PPC::RLWNM:
+  case PPC::RLWNM8:
+  case PPC::RLWNMo:
+  case PPC::RLWNM8o:
+  case PPC::RLDCL:
+  case PPC::RLDCLo:
+  case PPC::RLDCR:
+  case PPC::RLDCRo:
+  case PPC::SLW:
+  case PPC::SLW8:
+  case PPC::SLWo:
+  case PPC::SLW8o:
+  case PPC::SRW:
+  case PPC::SRW8:
+  case PPC::SRWo:
+  case PPC::SRW8o:
+  case PPC::SRAW:
+  case PPC::SRAWo:
+  case PPC::SLD:
+  case PPC::SLDo:
+  case PPC::SRD:
+  case PPC::SRDo:
+  case PPC::SRAD:
+  case PPC::SRADo:
+    III.SignedImm = false;
+    III.ZeroIsSpecialOrig = 0;
+    III.ZeroIsSpecialNew = 0;
+    III.IsCommutative = false;
+    // This isn't actually true, but the instructions ignore any of the
+    // upper bits, so any immediate loaded with an LI is acceptable.
+    III.ImmWidth = 16;
+    switch(Opc) {
+    default: llvm_unreachable("Unknown opcode");
+    case PPC::RLWNM: III.ImmOpcode = PPC::RLWINM; break;
+    case PPC::RLWNM8: III.ImmOpcode = PPC::RLWINM8; break;
+    case PPC::RLWNMo: III.ImmOpcode = PPC::RLWINMo; break;
+    case PPC::RLWNM8o: III.ImmOpcode = PPC::RLWINM8o; break;
+    case PPC::RLDCL: III.ImmOpcode = PPC::RLDICL; break;
+    case PPC::RLDCLo: III.ImmOpcode = PPC::RLDICLo; break;
+    case PPC::RLDCR: III.ImmOpcode = PPC::RLDICR; break;
+    case PPC::RLDCRo: III.ImmOpcode = PPC::RLDICRo; break;
+    case PPC::SLW: III.ImmOpcode = PPC::RLWINM; break;
+    case PPC::SLW8: III.ImmOpcode = PPC::RLWINM8; break;
+    case PPC::SLWo: III.ImmOpcode = PPC::RLWINMo; break;
+    case PPC::SLW8o: III.ImmOpcode = PPC::RLWINM8o; break;
+    case PPC::SRW: III.ImmOpcode = PPC::RLWINM; break;
+    case PPC::SRW8: III.ImmOpcode = PPC::RLWINM8; break;
+    case PPC::SRWo: III.ImmOpcode = PPC::RLWINMo; break;
+    case PPC::SRW8o: III.ImmOpcode = PPC::RLWINM8o; break;
+    case PPC::SRAW: III.ImmOpcode = PPC::SRAWI; break;
+    case PPC::SRAWo: III.ImmOpcode = PPC::SRAWIo; break;
+    case PPC::SLD: III.ImmOpcode = PPC::RLDICR; break;
+    case PPC::SLDo: III.ImmOpcode = PPC::RLDICRo; break;
+    case PPC::SRD: III.ImmOpcode = PPC::RLDICL; break;
+    case PPC::SRDo: III.ImmOpcode = PPC::RLDICLo; break;
+    case PPC::SRAD: III.ImmOpcode = PPC::SRADI; break;
+    case PPC::SRADo: III.ImmOpcode = PPC::SRADIo; break;
+    }
+    break;
+  // Loads and stores:
+  case PPC::LBZX:
+  case PPC::LBZX8:
+  case PPC::LHZX:
+  case PPC::LHZX8:
+  case PPC::LHAX:
+  case PPC::LHAX8:
+  case PPC::LWZX:
+  case PPC::LWZX8:
+  case PPC::LWAX:
+  case PPC::LDX:
+  case PPC::LFSX:
+  case PPC::LFDX:
+  case PPC::STBX:
+  case PPC::STBX8:
+  case PPC::STHX:
+  case PPC::STHX8:
+  case PPC::STWX:
+  case PPC::STWX8:
+  case PPC::STDX:
+  case PPC::STFSX:
+  case PPC::STFDX:
+    III.SignedImm = true;
+    III.ZeroIsSpecialOrig = 1;
+    III.ZeroIsSpecialNew = 2;
+    III.IsCommutative = true;
+    III.ImmOpNo = 1;
+    III.ConstantOpNo = 2;
+    switch(Opc) {
+    default: llvm_unreachable("Unknown opcode");
+    case PPC::LBZX: III.ImmOpcode = PPC::LBZ; break;
+    case PPC::LBZX8: III.ImmOpcode = PPC::LBZ8; break;
+    case PPC::LHZX: III.ImmOpcode = PPC::LHZ; break;
+    case PPC::LHZX8: III.ImmOpcode = PPC::LHZ8; break;
+    case PPC::LHAX: III.ImmOpcode = PPC::LHA; break;
+    case PPC::LHAX8: III.ImmOpcode = PPC::LHA8; break;
+    case PPC::LWZX: III.ImmOpcode = PPC::LWZ; break;
+    case PPC::LWZX8: III.ImmOpcode = PPC::LWZ8; break;
+    case PPC::LWAX:
+      III.ImmOpcode = PPC::LWA;
+      III.ImmMustBeMultipleOf = 4;
+      break;
+    case PPC::LDX: III.ImmOpcode = PPC::LD; III.ImmMustBeMultipleOf = 4; break;
+    case PPC::LFSX: III.ImmOpcode = PPC::LFS; break;
+    case PPC::LFDX: III.ImmOpcode = PPC::LFD; break;
+    case PPC::STBX: III.ImmOpcode = PPC::STB; break;
+    case PPC::STBX8: III.ImmOpcode = PPC::STB8; break;
+    case PPC::STHX: III.ImmOpcode = PPC::STH; break;
+    case PPC::STHX8: III.ImmOpcode = PPC::STH8; break;
+    case PPC::STWX: III.ImmOpcode = PPC::STW; break;
+    case PPC::STWX8: III.ImmOpcode = PPC::STW8; break;
+    case PPC::STDX:
+      III.ImmOpcode = PPC::STD;
+      III.ImmMustBeMultipleOf = 4;
+      break;
+    case PPC::STFSX: III.ImmOpcode = PPC::STFS; break;
+    case PPC::STFDX: III.ImmOpcode = PPC::STFD; break;
+    }
+    break;
+  case PPC::LBZUX:
+  case PPC::LBZUX8:
+  case PPC::LHZUX:
+  case PPC::LHZUX8:
+  case PPC::LHAUX:
+  case PPC::LHAUX8:
+  case PPC::LWZUX:
+  case PPC::LWZUX8:
+  case PPC::LDUX:
+  case PPC::LFSUX:
+  case PPC::LFDUX:
+  case PPC::STBUX:
+  case PPC::STBUX8:
+  case PPC::STHUX:
+  case PPC::STHUX8:
+  case PPC::STWUX:
+  case PPC::STWUX8:
+  case PPC::STDUX:
+  case PPC::STFSUX:
+  case PPC::STFDUX:
+    III.SignedImm = true;
+    III.ZeroIsSpecialOrig = 2;
+    III.ZeroIsSpecialNew = 3;
+    III.IsCommutative = false;
+    III.ImmOpNo = 2;
+    III.ConstantOpNo = 3;
+    switch(Opc) {
+    default: llvm_unreachable("Unknown opcode");
+    case PPC::LBZUX: III.ImmOpcode = PPC::LBZU; break;
+    case PPC::LBZUX8: III.ImmOpcode = PPC::LBZU8; break;
+    case PPC::LHZUX: III.ImmOpcode = PPC::LHZU; break;
+    case PPC::LHZUX8: III.ImmOpcode = PPC::LHZU8; break;
+    case PPC::LHAUX: III.ImmOpcode = PPC::LHAU; break;
+    case PPC::LHAUX8: III.ImmOpcode = PPC::LHAU8; break;
+    case PPC::LWZUX: III.ImmOpcode = PPC::LWZU; break;
+    case PPC::LWZUX8: III.ImmOpcode = PPC::LWZU8; break;
+    case PPC::LDUX:
+      III.ImmOpcode = PPC::LDU;
+      III.ImmMustBeMultipleOf = 4;
+      break;
+    case PPC::LFSUX: III.ImmOpcode = PPC::LFSU; break;
+    case PPC::LFDUX: III.ImmOpcode = PPC::LFDU; break;
+    case PPC::STBUX: III.ImmOpcode = PPC::STBU; break;
+    case PPC::STBUX8: III.ImmOpcode = PPC::STBU8; break;
+    case PPC::STHUX: III.ImmOpcode = PPC::STHU; break;
+    case PPC::STHUX8: III.ImmOpcode = PPC::STHU8; break;
+    case PPC::STWUX: III.ImmOpcode = PPC::STWU; break;
+    case PPC::STWUX8: III.ImmOpcode = PPC::STWU8; break;
+    case PPC::STDUX:
+      III.ImmOpcode = PPC::STDU;
+      III.ImmMustBeMultipleOf = 4;
+      break;
+    case PPC::STFSUX: III.ImmOpcode = PPC::STFSU; break;
+    case PPC::STFDUX: III.ImmOpcode = PPC::STFDU; break;
+    }
+    break;
+  // Power9 only.
+  case PPC::LXVX:
+  case PPC::LXSSPX:
+  case PPC::LXSDX:
+  case PPC::STXVX:
+  case PPC::STXSSPX:
+  case PPC::STXSDX:
+    if (!Subtarget.hasP9Vector())
+      return false;
+    III.SignedImm = true;
+    III.ZeroIsSpecialOrig = 1;
+    III.ZeroIsSpecialNew = 2;
+    III.IsCommutative = true;
+    III.ImmOpNo = 1;
+    III.ConstantOpNo = 2;
+    switch(Opc) {
+    default: llvm_unreachable("Unknown opcode");
+    case PPC::LXVX:
+      III.ImmOpcode = PPC::LXV;
+      III.ImmMustBeMultipleOf = 16;
+      break;
+    case PPC::LXSSPX:
+      III.ImmOpcode = PPC::LXSSP;
+      III.ImmMustBeMultipleOf = 4;
+      break;
+    case PPC::LXSDX:
+      III.ImmOpcode = PPC::LXSD;
+      III.ImmMustBeMultipleOf = 4;
+      break;
+    case PPC::STXVX:
+      III.ImmOpcode = PPC::STXV;
+      III.ImmMustBeMultipleOf = 16;
+      break;
+    case PPC::STXSSPX:
+      III.ImmOpcode = PPC::STXSSP;
+      III.ImmMustBeMultipleOf = 4;
+      break;
+    case PPC::STXSDX:
+      III.ImmOpcode = PPC::STXSD;
+      III.ImmMustBeMultipleOf = 4;
+      break;
+    }
+    break;
+  }
+  return true;
+}
+
+// Utility function for swaping two arbitrary operands of an instruction.
+static void swapMIOperands(MachineInstr &MI, unsigned Op1, unsigned Op2) {
+  assert(Op1 != Op2 && "Cannot swap operand with itself.");
+
+  unsigned MaxOp = std::max(Op1, Op2);
+  unsigned MinOp = std::min(Op1, Op2);
+  MachineOperand MOp1 = MI.getOperand(MinOp);
+  MachineOperand MOp2 = MI.getOperand(MaxOp);
+  MI.RemoveOperand(std::max(Op1, Op2));
+  MI.RemoveOperand(std::min(Op1, Op2));
+
+  // If the operands we are swapping are the two at the end (the common case)
+  // we can just remove both and add them in the opposite order.
+  if (MaxOp - MinOp == 1 && MI.getNumOperands() == MinOp) {
+    MI.addOperand(MOp2);
+    MI.addOperand(MOp1);
+  } else {
+    // Store all operands in a temporary vector, remove them and re-add in the
+    // right order.
+    SmallVector<MachineOperand, 2> MOps;
+    unsigned TotalOps = MI.getNumOperands() + 2; // We've already removed 2 ops.
+    for (unsigned i = MI.getNumOperands() - 1; i >= MinOp; i--) {
+      MOps.push_back(MI.getOperand(i));
+      MI.RemoveOperand(i);
+    }
+    // MOp2 needs to be added next.
+    MI.addOperand(MOp2);
+    // Now add the rest.
+    for (unsigned i = MI.getNumOperands(); i < TotalOps; i++) {
+      if (i == MaxOp)
+        MI.addOperand(MOp1);
+      else {
+        MI.addOperand(MOps.back());
+        MOps.pop_back();
+      }
+    }
+  }
+}
+
+bool PPCInstrInfo::transformToImmForm(MachineInstr &MI, const ImmInstrInfo &III,
+                                      unsigned ConstantOpNo,
+                                      int64_t Imm) const {
+  MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
+  bool PostRA = !MRI.isSSA();
+  // Exit early if we can't convert this.
+  if ((ConstantOpNo != III.ConstantOpNo) && !III.IsCommutative)
+    return false;
+  if (Imm % III.ImmMustBeMultipleOf)
+    return false;
+  if (III.SignedImm) {
+    APInt ActualValue(64, Imm, true);
+    if (!ActualValue.isSignedIntN(III.ImmWidth))
+      return false;
+  } else {
+    uint64_t UnsignedMax = (1 << III.ImmWidth) - 1;
+    if ((uint64_t)Imm > UnsignedMax)
+      return false;
+  }
+
+  // If we're post-RA, the instructions don't agree on whether register zero is
+  // special, we can transform this as long as the register operand that will
+  // end up in the location where zero is special isn't R0.
+  if (PostRA && III.ZeroIsSpecialOrig != III.ZeroIsSpecialNew) {
+    unsigned PosForOrigZero = III.ZeroIsSpecialOrig ? III.ZeroIsSpecialOrig :
+      III.ZeroIsSpecialNew + 1;
+    unsigned OrigZeroReg = MI.getOperand(PosForOrigZero).getReg();
+    unsigned NewZeroReg = MI.getOperand(III.ZeroIsSpecialNew).getReg();
+    // If R0 is in the operand where zero is special for the new instruction,
+    // it is unsafe to transform if the constant operand isn't that operand.
+    if ((NewZeroReg == PPC::R0 || NewZeroReg == PPC::X0) &&
+        ConstantOpNo != III.ZeroIsSpecialNew)
+      return false;
+    if ((OrigZeroReg == PPC::R0 || OrigZeroReg == PPC::X0) &&
+        ConstantOpNo != PosForOrigZero)
+      return false;
+  }
+
+  unsigned Opc = MI.getOpcode();
+  bool SpecialShift32 =
+    Opc == PPC::SLW || Opc == PPC::SLWo || Opc == PPC::SRW || Opc == PPC::SRWo;
+  bool SpecialShift64 =
+    Opc == PPC::SLD || Opc == PPC::SLDo || Opc == PPC::SRD || Opc == PPC::SRDo;
+  bool SetCR = Opc == PPC::SLWo || Opc == PPC::SRWo ||
+    Opc == PPC::SLDo || Opc == PPC::SRDo;
+  bool RightShift =
+    Opc == PPC::SRW || Opc == PPC::SRWo || Opc == PPC::SRD || Opc == PPC::SRDo;
+
+  MI.setDesc(get(III.ImmOpcode));
+  if (ConstantOpNo == III.ConstantOpNo) {
+    // Converting shifts to immediate form is a bit tricky since they may do
+    // one of three things:
+    // 1. If the shift amount is between OpSize and 2*OpSize, the result is zero
+    // 2. If the shift amount is zero, the result is unchanged (save for maybe
+    //    setting CR0)
+    // 3. If the shift amount is in [1, OpSize), it's just a shift
+    if (SpecialShift32 || SpecialShift64) {
+      LoadImmediateInfo LII;
+      LII.Imm = 0;
+      LII.SetCR = SetCR;
+      LII.Is64Bit = SpecialShift64;
+      uint64_t ShAmt = Imm & (SpecialShift32 ? 0x1F : 0x3F);
+      if (Imm & (SpecialShift32 ? 0x20 : 0x40))
+        replaceInstrWithLI(MI, LII);
+      // Shifts by zero don't change the value. If we don't need to set CR0,
+      // just convert this to a COPY. Can't do this post-RA since we've already
+      // cleaned up the copies.
+      else if (!SetCR && ShAmt == 0 && !PostRA) {
+        MI.RemoveOperand(2);
+        MI.setDesc(get(PPC::COPY));
+      } else {
+        // The 32 bit and 64 bit instructions are quite different.
+        if (SpecialShift32) {
+          // Left shifts use (N, 0, 31-N), right shifts use (32-N, N, 31).
+          uint64_t SH = RightShift ? 32 - ShAmt : ShAmt;
+          uint64_t MB = RightShift ? ShAmt : 0;
+          uint64_t ME = RightShift ? 31 : 31 - ShAmt;
+          MI.getOperand(III.ConstantOpNo).ChangeToImmediate(SH);
+          MachineInstrBuilder(*MI.getParent()->getParent(), MI).addImm(MB)
+            .addImm(ME);
+        } else {
+          // Left shifts use (N, 63-N), right shifts use (64-N, N).
+          uint64_t SH = RightShift ? 64 - ShAmt : ShAmt;
+          uint64_t ME = RightShift ? ShAmt : 63 - ShAmt;
+          MI.getOperand(III.ConstantOpNo).ChangeToImmediate(SH);
+          MachineInstrBuilder(*MI.getParent()->getParent(), MI).addImm(ME);
+        }
+      }
+    } else
+      MI.getOperand(ConstantOpNo).ChangeToImmediate(Imm);
+  }
+  // Convert commutative instructions (switch the operands and convert the
+  // desired one to an immediate.
+  else if (III.IsCommutative) {
+    MI.getOperand(ConstantOpNo).ChangeToImmediate(Imm);
+    swapMIOperands(MI, ConstantOpNo, III.ConstantOpNo);
+  } else
+    llvm_unreachable("Should have exited early!");
+
+  // For instructions for which the constant register replaces a different
+  // operand than where the immediate goes, we need to swap them.
+  if (III.ConstantOpNo != III.ImmOpNo)
+    swapMIOperands(MI, III.ConstantOpNo, III.ImmOpNo);
+
+  // If the R0/X0 register is special for the original instruction and not for
+  // the new instruction (or vice versa), we need to fix up the register class.
+  if (!PostRA && III.ZeroIsSpecialOrig != III.ZeroIsSpecialNew) {
+    if (!III.ZeroIsSpecialOrig) {
+      unsigned RegToModify = MI.getOperand(III.ZeroIsSpecialNew).getReg();
+      const TargetRegisterClass *NewRC =
+        MRI.getRegClass(RegToModify)->hasSuperClassEq(&PPC::GPRCRegClass) ?
+        &PPC::GPRC_and_GPRC_NOR0RegClass : &PPC::G8RC_and_G8RC_NOX0RegClass;
+      MRI.setRegClass(RegToModify, NewRC);
+    }
+  }
+  return true;
+}
+
 const TargetRegisterClass *
 PPCInstrInfo::updatedRC(const TargetRegisterClass *RC) const {
  if (Subtarget.hasVSX() && RC == &PPC::VRRCRegClass)
--- a/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/lib/Target/PowerPC/PPCInstrInfo.h
@ -72,6 +72,41 @@ enum {
 };
 } // end namespace PPCII

+// Instructions that have an immediate form might be convertible to that
+// form if the correct input is a result of a load immediate. In order to
+// know whether the transformation is special, we might need to know some
+// of the details of the two forms.
+struct ImmInstrInfo {
+  // Is the immediate field in the immediate form signed or unsigned?
+  uint64_t SignedImm : 1;
+  // Does the immediate need to be a multiple of some value?
+  uint64_t ImmMustBeMultipleOf : 5;
+  // Is R0/X0 treated specially by the original r+r instruction?
+  // If so, in which operand?
+  uint64_t ZeroIsSpecialOrig : 3;
+  // Is R0/X0 treated specially by the new r+i instruction?
+  // If so, in which operand?
+  uint64_t ZeroIsSpecialNew : 3;
+  // Is the operation commutative?
+  uint64_t IsCommutative : 1;
+  // The operand number to check for load immediate.
+  uint64_t ConstantOpNo : 3;
+  // The operand number for the immediate.
+  uint64_t ImmOpNo : 3;
+  // The opcode of the new instruction.
+  uint64_t ImmOpcode : 16;
+  // The size of the immediate.
+  uint64_t ImmWidth : 5;
+};
+
+// Information required to convert an instruction to just a materialized
+// immediate.
+struct LoadImmediateInfo {
+  unsigned Imm : 16;
+  unsigned Is64Bit : 1;
+  unsigned SetCR : 1;
+};
+
 class PPCSubtarget;
 class PPCInstrInfo : public PPCGenInstrInfo {
  PPCSubtarget &Subtarget;
@ -87,6 +122,10 @@ class PPCInstrInfo : public PPCGenInstrInfo {
                            const TargetRegisterClass *RC,
                            SmallVectorImpl<MachineInstr *> &NewMIs,
                            bool &NonRI, bool &SpillsVRS) const;
+  bool transformToImmForm(MachineInstr &MI, const ImmInstrInfo &III,
+                          unsigned ConstantOpNo, int64_t Imm) const;
+  MachineInstr *getConstantDefMI(MachineInstr &MI, unsigned &ConstOp,
+                                 bool &SeenIntermediateUse) const;
  virtual void anchor();

 protected:
@ -313,6 +352,19 @@ public:
  bool isZeroExtended(const MachineInstr &MI, const unsigned depth = 0) const {
   return isSignOrZeroExtended(MI, false, depth);
  }
+
+  bool convertToImmediateForm(MachineInstr &MI,
+                              MachineInstr **KilledDef = nullptr) const;
+  void replaceInstrWithLI(MachineInstr &MI, const LoadImmediateInfo &LII) const;
+
+  // This is used to find the "true" source register for n
+  // Machine instruction. Returns the original SrcReg unless it is the target
+  // of a copy-like operation, in which case we chain backwards through all
+  // such operations to the ultimate source register.  If a
+  // physical register is encountered, we stop the search.
+  static unsigned lookThruCopyLike(unsigned SrcReg,
+                                   const MachineRegisterInfo *MRI);
+  bool instrHasImmForm(const MachineInstr &MI, ImmInstrInfo &III) const;
 };

 }
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@ -1590,6 +1590,11 @@ def : Pat<(prefetch xoaddr:$dst, (i32 0), imm, (i32 0)),
          (ICBT 0, xoaddr:$dst)>, Requires<[HasICBT]>; // inst prefetch (for read)

 // Atomic operations
+// FIXME: some of these might be used with constant operands. This will result
+// in constant materialization instructions that may be redundant. We currently
+// clean this up in PPCMIPeephole with calls to
+// PPCInstrInfo::convertToImmediateForm() but we should probably not emit them
+// in the first place.
 let usesCustomInserter = 1 in {
  let Defs = [CR0] in {
    def ATOMIC_LOAD_ADD_I8 : Pseudo<
--- a/lib/Target/PowerPC/PPCMIPeephole.cpp
+++ b/lib/Target/PowerPC/PPCMIPeephole.cpp
@ -41,6 +41,22 @@ STATISTIC(MultiTOCSaves,
 STATISTIC(NumEliminatedSExt, "Number of eliminated sign-extensions");
 STATISTIC(NumEliminatedZExt, "Number of eliminated zero-extensions");
 STATISTIC(NumOptADDLIs, "Number of optimized ADD instruction fed by LI");
+STATISTIC(NumConvertedToImmediateForm,
+          "Number of instructions converted to their immediate form");
+STATISTIC(NumFunctionsEnteredInMIPeephole,
+          "Number of functions entered in PPC MI Peepholes");
+STATISTIC(NumFixedPointIterations,
+          "Number of fixed-point iterations converting reg-reg instructions "
+          "to reg-imm ones");
+
+static cl::opt<bool>
+FixedPointRegToImm("ppc-reg-to-imm-fixed-point", cl::Hidden, cl::init(true),
+                   cl::desc("Iterate to a fixed point when attempting to "
+                            "convert reg-reg instructions to reg-imm"));
+
+static cl::opt<bool>
+ConvertRegReg("ppc-convert-rr-to-ri", cl::Hidden, cl::init(true),
+              cl::desc("Convert eligible reg+reg instructions to reg+imm"));

 static cl::opt<bool>
    EnableSExtElimination("ppc-eliminate-signext",
@ -52,10 +68,6 @@ static cl::opt<bool>
                          cl::desc("enable elimination of zero-extensions"),
                          cl::init(false), cl::Hidden);

-namespace llvm {
-  void initializePPCMIPeepholePass(PassRegistry&);
-}
-
 namespace {

 struct PPCMIPeephole : public MachineFunctionPass {
@ -83,9 +95,6 @@ private:
  bool eliminateRedundantTOCSaves(std::map<MachineInstr *, bool> &TOCSaves);
  void UpdateTOCSaves(std::map<MachineInstr *, bool> &TOCSaves,
                      MachineInstr *MI);
-  // Find the "true" register represented by SrcReg (following chains
-  // of copies and subreg_to_reg operations).
-  unsigned lookThruCopyLike(unsigned SrcReg);

 public:

@ -212,6 +221,35 @@ bool PPCMIPeephole::simplifyCode(void) {
  MachineInstr* ToErase = nullptr;
  std::map<MachineInstr *, bool> TOCSaves;

+  NumFunctionsEnteredInMIPeephole++;
+  if (ConvertRegReg) {
+    // Fixed-point conversion of reg/reg instructions fed by load-immediate
+    // into reg/imm instructions. FIXME: This is expensive, control it with
+    // an option.
+    bool SomethingChanged = false;
+    do {
+      NumFixedPointIterations++;
+      SomethingChanged = false;
+      for (MachineBasicBlock &MBB : *MF) {
+        for (MachineInstr &MI : MBB) {
+          if (MI.isDebugValue())
+            continue;
+
+          if (TII->convertToImmediateForm(MI)) {
+            // We don't erase anything in case the def has other uses. Let DCE
+            // remove it if it can be removed.
+            DEBUG(dbgs() << "Converted instruction to imm form: ");
+            DEBUG(MI.dump());
+            NumConvertedToImmediateForm++;
+            SomethingChanged = true;
+            Simplified = true;
+            continue;
+          }
+        }
+      }
+    } while (SomethingChanged && FixedPointRegToImm);
+  }
+
  for (MachineBasicBlock &MBB : *MF) {
    for (MachineInstr &MI : MBB) {

@ -258,8 +296,10 @@ bool PPCMIPeephole::simplifyCode(void) {
          //   XXPERMDI t, SUBREG_TO_REG(s), SUBREG_TO_REG(s), immed.
          // We have to look through chains of COPY and SUBREG_TO_REG
          // to find the real source values for comparison.
-          unsigned TrueReg1 = lookThruCopyLike(MI.getOperand(1).getReg());
-          unsigned TrueReg2 = lookThruCopyLike(MI.getOperand(2).getReg());
+          unsigned TrueReg1 =
+            TII->lookThruCopyLike(MI.getOperand(1).getReg(), MRI);
+          unsigned TrueReg2 =
+            TII->lookThruCopyLike(MI.getOperand(2).getReg(), MRI);

          if (TrueReg1 == TrueReg2
              && TargetRegisterInfo::isVirtualRegister(TrueReg1)) {
@ -273,7 +313,8 @@ bool PPCMIPeephole::simplifyCode(void) {
            auto isConversionOfLoadAndSplat = [=]() -> bool {
              if (DefOpc != PPC::XVCVDPSXDS && DefOpc != PPC::XVCVDPUXDS)
                return false;
-              unsigned DefReg = lookThruCopyLike(DefMI->getOperand(1).getReg());
+              unsigned DefReg =
+                TII->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI);
              if (TargetRegisterInfo::isVirtualRegister(DefReg)) {
                MachineInstr *LoadMI = MRI->getVRegDef(DefReg);
                if (LoadMI && LoadMI->getOpcode() == PPC::LXVDSX)
@ -299,10 +340,10 @@ bool PPCMIPeephole::simplifyCode(void) {
            // can replace it with a copy.
            if (DefOpc == PPC::XXPERMDI) {
              unsigned FeedImmed = DefMI->getOperand(3).getImm();
-              unsigned FeedReg1
-                = lookThruCopyLike(DefMI->getOperand(1).getReg());
-              unsigned FeedReg2
-                = lookThruCopyLike(DefMI->getOperand(2).getReg());
+              unsigned FeedReg1 =
+                TII->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI);
+              unsigned FeedReg2 =
+                TII->lookThruCopyLike(DefMI->getOperand(2).getReg(), MRI);

              if ((FeedImmed == 0 || FeedImmed == 3) && FeedReg1 == FeedReg2) {
                DEBUG(dbgs()
@ -360,7 +401,8 @@ bool PPCMIPeephole::simplifyCode(void) {
      case PPC::XXSPLTW: {
        unsigned MyOpcode = MI.getOpcode();
        unsigned OpNo = MyOpcode == PPC::XXSPLTW ? 1 : 2;
-        unsigned TrueReg = lookThruCopyLike(MI.getOperand(OpNo).getReg());
+        unsigned TrueReg =
+          TII->lookThruCopyLike(MI.getOperand(OpNo).getReg(), MRI);
        if (!TargetRegisterInfo::isVirtualRegister(TrueReg))
          break;
        MachineInstr *DefMI = MRI->getVRegDef(TrueReg);
@ -422,7 +464,8 @@ bool PPCMIPeephole::simplifyCode(void) {
      }
      case PPC::XVCVDPSP: {
        // If this is a DP->SP conversion fed by an FRSP, the FRSP is redundant.
-        unsigned TrueReg = lookThruCopyLike(MI.getOperand(1).getReg());
+        unsigned TrueReg =
+          TII->lookThruCopyLike(MI.getOperand(1).getReg(), MRI);
        if (!TargetRegisterInfo::isVirtualRegister(TrueReg))
          break;
        MachineInstr *DefMI = MRI->getVRegDef(TrueReg);
@ -430,8 +473,10 @@ bool PPCMIPeephole::simplifyCode(void) {
        // This can occur when building a vector of single precision or integer
        // values.
        if (DefMI && DefMI->getOpcode() == PPC::XXPERMDI) {
-          unsigned DefsReg1 = lookThruCopyLike(DefMI->getOperand(1).getReg());
-          unsigned DefsReg2 = lookThruCopyLike(DefMI->getOperand(2).getReg());
+          unsigned DefsReg1 =
+            TII->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI);
+          unsigned DefsReg2 =
+            TII->lookThruCopyLike(DefMI->getOperand(2).getReg(), MRI);
          if (!TargetRegisterInfo::isVirtualRegister(DefsReg1) ||
              !TargetRegisterInfo::isVirtualRegister(DefsReg2))
            break;
@ -1221,36 +1266,6 @@ bool PPCMIPeephole::eliminateRedundantCompare(void) {
  return Simplified;
 }

-// This is used to find the "true" source register for an
-// XXPERMDI instruction, since MachineCSE does not handle the
-// "copy-like" operations (Copy and SubregToReg).  Returns
-// the original SrcReg unless it is the target of a copy-like
-// operation, in which case we chain backwards through all
-// such operations to the ultimate source register.  If a
-// physical register is encountered, we stop the search.
-unsigned PPCMIPeephole::lookThruCopyLike(unsigned SrcReg) {
-
-  while (true) {
-
-    MachineInstr *MI = MRI->getVRegDef(SrcReg);
-    if (!MI->isCopyLike())
-      return SrcReg;
-
-    unsigned CopySrcReg;
-    if (MI->isCopy())
-      CopySrcReg = MI->getOperand(1).getReg();
-    else {
-      assert(MI->isSubregToReg() && "bad opcode for lookThruCopyLike");
-      CopySrcReg = MI->getOperand(2).getReg();
-    }
-
-    if (!TargetRegisterInfo::isVirtualRegister(CopySrcReg))
-      return CopySrcReg;
-
-    SrcReg = CopySrcReg;
-  }
-}
-
 } // end default namespace

 INITIALIZE_PASS_BEGIN(PPCMIPeephole, DEBUG_TYPE,
--- a/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
+++ b/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
@ -0,0 +1,95 @@
+//===--------- PPCPreEmitPeephole.cpp - Late peephole optimizations -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// A pre-emit peephole for catching opportunities introduced by late passes such
+// as MachineBlockPlacement.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPC.h"
+#include "PPCInstrInfo.h"
+#include "PPCSubtarget.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "ppc-pre-emit-peephole"
+
+STATISTIC(NumRRConvertedInPreEmit,
+          "Number of r+r instructions converted to r+i in pre-emit peephole");
+STATISTIC(NumRemovedInPreEmit,
+          "Number of instructions deleted in pre-emit peephole");
+
+static cl::opt<bool>
+RunPreEmitPeephole("ppc-late-peephole", cl::Hidden, cl::init(true),
+                   cl::desc("Run pre-emit peephole optimizations."));
+
+namespace {
+  class PPCPreEmitPeephole : public MachineFunctionPass {
+  public:
+    static char ID;
+    PPCPreEmitPeephole() : MachineFunctionPass(ID) {
+      initializePPCPreEmitPeepholePass(*PassRegistry::getPassRegistry());
+    }
+
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+
+    MachineFunctionProperties getRequiredProperties() const override {
+      return MachineFunctionProperties().set(
+          MachineFunctionProperties::Property::NoVRegs);
+    }
+
+    bool runOnMachineFunction(MachineFunction &MF) override {
+      if (skipFunction(*MF.getFunction()) || !RunPreEmitPeephole)
+        return false;
+      bool Changed = false;
+      const PPCInstrInfo *TII = MF.getSubtarget<PPCSubtarget>().getInstrInfo();
+      SmallVector<MachineInstr *, 4> InstrsToErase;
+      for (MachineBasicBlock &MBB : MF) {
+        for (MachineInstr &MI : MBB) {
+          MachineInstr *DefMIToErase = nullptr;
+          if (TII->convertToImmediateForm(MI, &DefMIToErase)) {
+            Changed = true;
+            NumRRConvertedInPreEmit++;
+            DEBUG(dbgs() << "Converted instruction to imm form: ");
+            DEBUG(MI.dump());
+            if (DefMIToErase) {
+              InstrsToErase.push_back(DefMIToErase);
+            }
+          }
+        }
+      }
+      for (MachineInstr *MI : InstrsToErase) {
+        DEBUG(dbgs() << "PPC pre-emit peephole: erasing instruction: ");
+        DEBUG(MI->dump());
+        MI->eraseFromParent();
+        NumRemovedInPreEmit++;
+      }
+      return Changed;
+    }
+  };
+}
+
+INITIALIZE_PASS(PPCPreEmitPeephole, DEBUG_TYPE, "PowerPC Pre-Emit Peephole",
+                false, false)
+char PPCPreEmitPeephole::ID = 0;
+
+FunctionPass *llvm::createPPCPreEmitPeepholePass() {
+  return new PPCPreEmitPeephole();
+}
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@ -101,7 +101,9 @@ extern "C" void LLVMInitializePowerPCTarget() {
  PassRegistry &PR = *PassRegistry::getPassRegistry();
  initializePPCBoolRetToIntPass(PR);
  initializePPCExpandISELPass(PR);
+  initializePPCPreEmitPeepholePass(PR);
  initializePPCTLSDynamicCallPass(PR);
+  initializePPCMIPeepholePass(PR);
 }

 /// Return the datalayout string of a subtarget.
@ -440,6 +442,7 @@ void PPCPassConfig::addPreSched2() {
 }

 void PPCPassConfig::addPreEmitPass() {
+  addPass(createPPCPreEmitPeepholePass());
  addPass(createPPCExpandISELPass());

  if (getOptLevel() != CodeGenOpt::None)
--- a/test/CodeGen/PowerPC/build-vector-tests.ll
+++ b/test/CodeGen/PowerPC/build-vector-tests.ll
@ -3508,13 +3508,13 @@ entry:
 ; P9LE: xxmrghd
 ; P9LE-NEXT: xvcvdpsxds v2
 ; P9LE-NEXT: blr
-; P8BE: lfsx
-; P8BE: lfsx
+; P8BE: lfs
+; P8BE: lfs
 ; P8BE: xxmrghd
 ; P8BE-NEXT: xvcvdpsxds v2
 ; P8BE-NEXT: blr
-; P8LE: lfsx
-; P8LE: lfsx
+; P8LE: lfs
+; P8LE: lfs
 ; P8LE: xxmrghd
 ; P8LE-NEXT: xvcvdpsxds v2
 ; P8LE-NEXT: blr
@ -3546,13 +3546,13 @@ entry:
 ; P9LE: xxmrghd
 ; P9LE-NEXT: xvcvdpsxds v2
 ; P9LE-NEXT: blr
-; P8BE: lfsx
-; P8BE: lfsx
+; P8BE: lfs
+; P8BE: lfs
 ; P8BE: xxmrghd
 ; P8BE-NEXT: xvcvdpsxds v2
 ; P8BE-NEXT: blr
-; P8LE: lfsx
-; P8LE: lfsx
+; P8LE: lfs
+; P8LE: lfs
 ; P8LE: xxmrghd
 ; P8LE-NEXT: xvcvdpsxds v2
 ; P8LE-NEXT: blr
@ -3591,13 +3591,13 @@ entry:
 ; P9LE-NEXT: blr
 ; P8BE: sldi
 ; P8BE: lfsux
-; P8BE: lfsx
+; P8BE: lfs
 ; P8BE: xxmrghd
 ; P8BE-NEXT: xvcvdpsxds v2
 ; P8BE-NEXT: blr
 ; P8LE: sldi
 ; P8LE: lfsux
-; P8LE: lfsx
+; P8LE: lfs
 ; P8LE: xxmrghd
 ; P8LE-NEXT: xvcvdpsxds v2
 ; P8LE-NEXT: blr
@ -3636,13 +3636,13 @@ entry:
 ; P9LE-NEXT: blr
 ; P8BE: sldi
 ; P8BE: lfsux
-; P8BE: lfsx
+; P8BE: lfs
 ; P8BE: xxmrghd
 ; P8BE-NEXT: xvcvdpsxds v2
 ; P8BE-NEXT: blr
 ; P8LE: sldi
 ; P8LE: lfsux
-; P8LE: lfsx
+; P8LE: lfs
 ; P8LE: xxmrghd
 ; P8LE-NEXT: xvcvdpsxds v2
 ; P8LE-NEXT: blr
@ -3693,11 +3693,11 @@ entry:
 ; P9LE-NEXT: xscvdpsxds
 ; P9LE-NEXT: xxspltd v2
 ; P9LE-NEXT: blr
-; P8BE: lfsx
+; P8BE: lfs
 ; P8BE-NEXT: xscvdpsxds
 ; P8BE-NEXT: xxspltd v2
 ; P8BE-NEXT: blr
-; P8LE: lfsx
+; P8LE: lfs
 ; P8LE-NEXT: xscvdpsxds
 ; P8LE-NEXT: xxspltd v2
 ; P8LE-NEXT: blr
@ -4412,13 +4412,13 @@ entry:
 ; P9LE: xxmrghd
 ; P9LE-NEXT: xvcvdpuxds v2
 ; P9LE-NEXT: blr
-; P8BE: lfsx
-; P8BE: lfsx
+; P8BE: lfs
+; P8BE: lfs
 ; P8BE: xxmrghd
 ; P8BE-NEXT: xvcvdpuxds v2
 ; P8BE-NEXT: blr
-; P8LE: lfsx
-; P8LE: lfsx
+; P8LE: lfs
+; P8LE: lfs
 ; P8LE: xxmrghd
 ; P8LE-NEXT: xvcvdpuxds v2
 ; P8LE-NEXT: blr
@ -4450,13 +4450,13 @@ entry:
 ; P9LE: xxmrghd
 ; P9LE-NEXT: xvcvdpuxds v2
 ; P9LE-NEXT: blr
-; P8BE: lfsx
-; P8BE: lfsx
+; P8BE: lfs
+; P8BE: lfs
 ; P8BE: xxmrghd
 ; P8BE-NEXT: xvcvdpuxds v2
 ; P8BE-NEXT: blr
-; P8LE: lfsx
-; P8LE: lfsx
+; P8LE: lfs
+; P8LE: lfs
 ; P8LE: xxmrghd
 ; P8LE-NEXT: xvcvdpuxds v2
 ; P8LE-NEXT: blr
@ -4495,13 +4495,13 @@ entry:
 ; P9LE-NEXT: blr
 ; P8BE: sldi
 ; P8BE: lfsux
-; P8BE: lfsx
+; P8BE: lfs
 ; P8BE: xxmrghd
 ; P8BE-NEXT: xvcvdpuxds v2
 ; P8BE-NEXT: blr
 ; P8LE: sldi
 ; P8LE: lfsux
-; P8LE: lfsx
+; P8LE: lfs
 ; P8LE: xxmrghd
 ; P8LE-NEXT: xvcvdpuxds v2
 ; P8LE-NEXT: blr
@ -4540,13 +4540,13 @@ entry:
 ; P9LE-NEXT: blr
 ; P8BE: sldi
 ; P8BE: lfsux
-; P8BE: lfsx
+; P8BE: lfs
 ; P8BE: xxmrghd
 ; P8BE-NEXT: xvcvdpuxds v2
 ; P8BE-NEXT: blr
 ; P8LE: sldi
 ; P8LE: lfsux
-; P8LE: lfsx
+; P8LE: lfs
 ; P8LE: xxmrghd
 ; P8LE-NEXT: xvcvdpuxds v2
 ; P8LE-NEXT: blr
@ -4597,11 +4597,11 @@ entry:
 ; P9LE-NEXT: xscvdpuxds
 ; P9LE-NEXT: xxspltd v2
 ; P9LE-NEXT: blr
-; P8BE: lfsx
+; P8BE: lfs
 ; P8BE-NEXT: xscvdpuxds
 ; P8BE-NEXT: xxspltd v2
 ; P8BE-NEXT: blr
-; P8LE: lfsx
+; P8LE: lfs
 ; P8LE-NEXT: xscvdpuxds
 ; P8LE-NEXT: xxspltd v2
 ; P8LE-NEXT: blr
--- a/test/CodeGen/PowerPC/convert-rr-to-ri-instrs-R0-special-handling.mir
+++ b/test/CodeGen/PowerPC/convert-rr-to-ri-instrs-R0-special-handling.mir
@ -0,0 +1,436 @@
+# RUN: llc -start-after ppc-mi-peepholes -ppc-late-peephole %s -o - | FileCheck %s
+--- |
+  ; ModuleID = 'a.ll'
+  source_filename = "a.c"
+  target datalayout = "e-m:e-i64:64-n32:64"
+  target triple = "powerpc64le-unknown-linux-gnu"
+  
+  ; Function Attrs: norecurse nounwind readnone
+  define signext i32 @unsafeAddR0R3(i32 signext %a, i32 signext %b) local_unnamed_addr #0 {
+  entry:
+    %add = add nsw i32 %b, %a
+    ret i32 %add
+  }
+  
+  ; Function Attrs: norecurse nounwind readnone
+  define signext i32 @unsafeAddR3R0(i32 signext %a, i32 signext %b) local_unnamed_addr #0 {
+  entry:
+    %add = add nsw i32 %b, %a
+    ret i32 %add
+  }
+  
+  ; Function Attrs: norecurse nounwind readnone
+  define signext i32 @safeAddR0R3(i32 signext %a, i32 signext %b) local_unnamed_addr #0 {
+  entry:
+    %add = add nsw i32 %b, %a
+    ret i32 %add
+  }
+  
+  ; Function Attrs: norecurse nounwind readnone
+  define signext i32 @safeAddR3R0(i32 signext %a, i32 signext %b) local_unnamed_addr #0 {
+  entry:
+    %add = add nsw i32 %b, %a
+    ret i32 %add
+  }
+  
+  ; Function Attrs: norecurse nounwind readonly
+  define i64 @unsafeLDXR3R0(i64* nocapture readonly %ptr, i64 %off) local_unnamed_addr #1 {
+  entry:
+    %0 = bitcast i64* %ptr to i8*
+    %add.ptr = getelementptr inbounds i8, i8* %0, i64 %off
+    %1 = bitcast i8* %add.ptr to i64*
+    %2 = load i64, i64* %1, align 8, !tbaa !3
+    ret i64 %2
+  }
+  
+  ; Function Attrs: norecurse nounwind readonly
+  define i64 @safeLDXZeroR3(i64* nocapture readonly %ptr, i64 %off) local_unnamed_addr #1 {
+  entry:
+    %0 = bitcast i64* %ptr to i8*
+    %add.ptr = getelementptr inbounds i8, i8* %0, i64 %off
+    %1 = bitcast i8* %add.ptr to i64*
+    %2 = load i64, i64* %1, align 8, !tbaa !3
+    ret i64 %2
+  }
+  
+  ; Function Attrs: norecurse nounwind readonly
+  define i64 @safeLDXR3R0(i64* nocapture readonly %ptr, i64 %off) local_unnamed_addr #1 {
+  entry:
+    %0 = bitcast i64* %ptr to i8*
+    %add.ptr = getelementptr inbounds i8, i8* %0, i64 %off
+    %1 = bitcast i8* %add.ptr to i64*
+    %2 = load i64, i64* %1, align 8, !tbaa !3
+    ret i64 %2
+  }
+  
+  attributes #0 = { norecurse nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+vsx,-power9-vector,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
+  attributes #1 = { norecurse nounwind readonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+vsx,-power9-vector,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
+  
+  !llvm.module.flags = !{!0, !1}
+  !llvm.ident = !{!2}
+  
+  !0 = !{i32 1, !"wchar_size", i32 4}
+  !1 = !{i32 7, !"PIC Level", i32 2}
+  !2 = !{!"clang version 6.0.0 (trunk 318832)"}
+  !3 = !{!4, !4, i64 0}
+  !4 = !{!"long long", !5, i64 0}
+  !5 = !{!"omnipotent char", !6, i64 0}
+  !6 = !{!"Simple C/C++ TBAA"}
+
+...
+---
+name:            unsafeAddR0R3
+alignment:       4
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+tracksRegLiveness: true
+registers:       
+  - { id: 0, class: g8rc, preferred-register: '' }
+  - { id: 1, class: g8rc, preferred-register: '' }
+  - { id: 2, class: gprc, preferred-register: '' }
+  - { id: 3, class: gprc, preferred-register: '' }
+  - { id: 4, class: gprc, preferred-register: '' }
+  - { id: 5, class: g8rc, preferred-register: '' }
+liveins:         
+  - { reg: '%x3', virtual-reg: '%0' }
+  - { reg: '%x4', virtual-reg: '%1' }
+frameInfo:       
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       0
+  offsetAdjustment: 0
+  maxAlignment:    0
+  adjustsStack:    false
+  hasCalls:        false
+  stackProtector:  ''
+  maxCallFrameSize: 4294967295
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+  savePoint:       ''
+  restorePoint:    ''
+fixedStack:      
+stack:           
+constants:       
+body:             |
+  bb.0.entry:
+    liveins: %x0, %x4
+  
+    %1:g8rc = COPY %x4
+    %0:g8rc = COPY %x0
+    %2:gprc = LI 44
+    %3:gprc = COPY %1.sub_32
+    %4:gprc = ADD4 killed %r0, killed %2
+    ; CHECK: li 3, 44
+    ; CHECK: add 3, 0, 3
+    %5:g8rc = EXTSW_32_64 killed %4
+    %x3 = COPY %5
+    BLR8 implicit %lr8, implicit %rm, implicit %x3
+
+...
+---
+name:            unsafeAddR3R0
+alignment:       4
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+tracksRegLiveness: true
+registers:       
+  - { id: 0, class: g8rc, preferred-register: '' }
+  - { id: 1, class: g8rc, preferred-register: '' }
+  - { id: 2, class: gprc, preferred-register: '' }
+  - { id: 3, class: gprc, preferred-register: '' }
+  - { id: 4, class: gprc, preferred-register: '' }
+  - { id: 5, class: g8rc, preferred-register: '' }
+liveins:         
+  - { reg: '%x3', virtual-reg: '%0' }
+  - { reg: '%x4', virtual-reg: '%1' }
+frameInfo:       
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       0
+  offsetAdjustment: 0
+  maxAlignment:    0
+  adjustsStack:    false
+  hasCalls:        false
+  stackProtector:  ''
+  maxCallFrameSize: 4294967295
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+  savePoint:       ''
+  restorePoint:    ''
+fixedStack:      
+stack:           
+constants:       
+body:             |
+  bb.0.entry:
+    liveins: %x0, %x4
+  
+    %1:g8rc = COPY %x4
+    %0:g8rc = COPY %x0
+    %2:gprc = COPY %0.sub_32
+    %3:gprc = LI 44
+    %4:gprc = ADD4 killed %3, killed %r0
+    ; CHECK: li 3, 44
+    ; CHECK: add 3, 3, 0
+    %5:g8rc = EXTSW_32_64 killed %4
+    %x3 = COPY %5
+    BLR8 implicit %lr8, implicit %rm, implicit %x3
+
+...
+---
+name:            safeAddR0R3
+alignment:       4
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+tracksRegLiveness: true
+registers:       
+  - { id: 0, class: g8rc, preferred-register: '' }
+  - { id: 1, class: g8rc, preferred-register: '' }
+  - { id: 2, class: gprc, preferred-register: '' }
+  - { id: 3, class: gprc, preferred-register: '' }
+  - { id: 4, class: gprc, preferred-register: '' }
+  - { id: 5, class: g8rc, preferred-register: '' }
+liveins:         
+  - { reg: '%x3', virtual-reg: '%0' }
+  - { reg: '%x4', virtual-reg: '%1' }
+frameInfo:       
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       0
+  offsetAdjustment: 0
+  maxAlignment:    0
+  adjustsStack:    false
+  hasCalls:        false
+  stackProtector:  ''
+  maxCallFrameSize: 4294967295
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+  savePoint:       ''
+  restorePoint:    ''
+fixedStack:      
+stack:           
+constants:       
+body:             |
+  bb.0.entry:
+    liveins: %x3, %x4
+  
+    %1:g8rc = COPY %x4
+    %0:g8rc = COPY %x3
+    %2:gprc = COPY %0.sub_32
+    %r0 = LI 44
+    %4:gprc = ADD4 killed %r0, killed %2
+    ; CHECK: addi 3, 3, 44
+    %5:g8rc = EXTSW_32_64 killed %4
+    %x3 = COPY %5
+    BLR8 implicit %lr8, implicit %rm, implicit %x3
+
+...
+---
+name:            safeAddR3R0
+alignment:       4
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+tracksRegLiveness: true
+registers:       
+  - { id: 0, class: g8rc, preferred-register: '' }
+  - { id: 1, class: g8rc, preferred-register: '' }
+  - { id: 2, class: gprc, preferred-register: '' }
+  - { id: 3, class: gprc, preferred-register: '' }
+  - { id: 4, class: gprc, preferred-register: '' }
+  - { id: 5, class: g8rc, preferred-register: '' }
+liveins:         
+  - { reg: '%x3', virtual-reg: '%0' }
+  - { reg: '%x4', virtual-reg: '%1' }
+frameInfo:       
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       0
+  offsetAdjustment: 0
+  maxAlignment:    0
+  adjustsStack:    false
+  hasCalls:        false
+  stackProtector:  ''
+  maxCallFrameSize: 4294967295
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+  savePoint:       ''
+  restorePoint:    ''
+fixedStack:      
+stack:           
+constants:       
+body:             |
+  bb.0.entry:
+    liveins: %x3, %x4
+  
+    %1:g8rc = COPY %x4
+    %0:g8rc = COPY %x3
+    %2:gprc = COPY %0.sub_32
+    %r0 = LI 44
+    %4:gprc = ADD4 killed %2, killed %r0
+    ; CHECK: addi 3, 3, 44
+    %5:g8rc = EXTSW_32_64 killed %4
+    %x3 = COPY %5
+    BLR8 implicit %lr8, implicit %rm, implicit %x3
+
+...
+---
+name:            unsafeLDXR3R0
+alignment:       4
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+tracksRegLiveness: true
+registers:       
+  - { id: 0, class: g8rc_and_g8rc_nox0, preferred-register: '' }
+  - { id: 1, class: g8rc, preferred-register: '' }
+  - { id: 2, class: g8rc, preferred-register: '' }
+liveins:         
+  - { reg: '%x0', virtual-reg: '%0' }
+  - { reg: '%x4', virtual-reg: '%1' }
+frameInfo:       
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       0
+  offsetAdjustment: 0
+  maxAlignment:    0
+  adjustsStack:    false
+  hasCalls:        false
+  stackProtector:  ''
+  maxCallFrameSize: 4294967295
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+  savePoint:       ''
+  restorePoint:    ''
+fixedStack:      
+stack:           
+constants:       
+body:             |
+  bb.0.entry:
+    liveins: %x0, %x4
+  
+    %1:g8rc = COPY %x4
+    %0:g8rc_and_g8rc_nox0 = LI8 44
+    %2:g8rc = LDX %0, %x0 :: (load 8 from %ir.1, !tbaa !3)
+    ; CHECK: li 3, 44
+    ; CHECK: ldx 3, 3, 0
+    %x3 = COPY %2
+    BLR8 implicit %lr8, implicit %rm, implicit %x3
+
+...
+---
+name:            safeLDXZeroR3
+alignment:       4
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+tracksRegLiveness: true
+registers:       
+  - { id: 0, class: g8rc_and_g8rc_nox0, preferred-register: '' }
+  - { id: 1, class: g8rc, preferred-register: '' }
+  - { id: 2, class: g8rc, preferred-register: '' }
+liveins:         
+  - { reg: '%x3', virtual-reg: '%0' }
+  - { reg: '%x4', virtual-reg: '%1' }
+frameInfo:       
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       0
+  offsetAdjustment: 0
+  maxAlignment:    0
+  adjustsStack:    false
+  hasCalls:        false
+  stackProtector:  ''
+  maxCallFrameSize: 4294967295
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+  savePoint:       ''
+  restorePoint:    ''
+fixedStack:      
+stack:           
+constants:       
+body:             |
+  bb.0.entry:
+    liveins: %x3, %x4
+  
+    %1:g8rc = LI8 44
+    %0:g8rc_and_g8rc_nox0 = LI8 44
+    %2:g8rc = LDX %zero8, %1 :: (load 8 from %ir.1, !tbaa !3)
+    ; CHECK: ld 3, 44(0)
+    %x3 = COPY %2
+    BLR8 implicit %lr8, implicit %rm, implicit %x3
+
+...
+---
+name:            safeLDXR3R0
+alignment:       4
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+tracksRegLiveness: true
+registers:       
+  - { id: 0, class: g8rc_and_g8rc_nox0, preferred-register: '' }
+  - { id: 1, class: g8rc, preferred-register: '' }
+  - { id: 2, class: g8rc, preferred-register: '' }
+liveins:         
+  - { reg: '%x3', virtual-reg: '%0' }
+  - { reg: '%x4', virtual-reg: '%1' }
+frameInfo:       
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       0
+  offsetAdjustment: 0
+  maxAlignment:    0
+  adjustsStack:    false
+  hasCalls:        false
+  stackProtector:  ''
+  maxCallFrameSize: 4294967295
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+  savePoint:       ''
+  restorePoint:    ''
+fixedStack:      
+stack:           
+constants:       
+body:             |
+  bb.0.entry:
+    liveins: %x3, %x4
+  
+    %x0 = LI8 44
+    %0:g8rc_and_g8rc_nox0 = COPY %x3
+    %2:g8rc = LDX %0, %x0 :: (load 8 from %ir.1, !tbaa !3)
+    ; CHECK: ld 3, 44(3)
+    %x3 = COPY %2
+    BLR8 implicit %lr8, implicit %rm, implicit %x3
+
+...
--- a/test/CodeGen/PowerPC/convert-rr-to-ri-instrs.mir
+++ b/test/CodeGen/PowerPC/convert-rr-to-ri-instrs.mir
--- a/test/CodeGen/PowerPC/fast-isel-call.ll
+++ b/test/CodeGen/PowerPC/fast-isel-call.ll
@ -37,9 +37,13 @@ define void @foo(i8 %a, i16 %b) nounwind {

 ;; A few test to check materialization
  %5 = call i32 @t2(i8 zeroext 255)
-; ELF64: clrldi {{[0-9]+}}, {{[0-9]+}}, 56
+; ELF64: li 3, 255
+; ELF64-NOT: clrldi
  %6 = call i32 @t4(i16 zeroext 65535)
-; ELF64: clrldi {{[0-9]+}}, {{[0-9]+}}, 48
+; ELF64: lis 3, 0
+; ELF64: ori 3, 3, 65535
+; ELF64: clrldi 3, 3, 48
+; ELF64: bl t4
  ret void
 }

@ -66,12 +70,8 @@ entry:
 ; ELF64: li 6, 28
 ; ELF64: li 7, 40
 ; ELF64: li 8, 186
-; ELF64: clrldi 3, 3, 56
-; ELF64: clrldi 4, 4, 56
-; ELF64: clrldi 5, 5, 56
-; ELF64: clrldi 6, 6, 56
-; ELF64: clrldi 7, 7, 56
-; ELF64: clrldi 8, 8, 56
+; ELF64-NOT: clrldi
+; ELF64: bl bar
  ret i32 0
 }

--- a/test/CodeGen/PowerPC/setcc-logic.ll
+++ b/test/CodeGen/PowerPC/setcc-logic.ll
@ -418,9 +418,9 @@ define <4 x i1> @any_sign_bits_clear_vec(<4 x i32> %P, <4 x i32> %Q) {
 define zeroext i1 @ne_neg1_and_ne_zero(i64 %x) {
 ; CHECK-LABEL: ne_neg1_and_ne_zero:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    li 4, 1
 ; CHECK-NEXT:    addi 3, 3, 1
-; CHECK-NEXT:    subfc 3, 3, 4
+; CHECK-NEXT:    li 4, 1
+; CHECK-NEXT:    subfic 3, 3, 1
 ; CHECK-NEXT:    subfe 3, 4, 4
 ; CHECK-NEXT:    neg 3, 3
 ; CHECK-NEXT:    blr
--- a/test/CodeGen/PowerPC/simplifyConstCmpToISEL.ll
+++ b/test/CodeGen/PowerPC/simplifyConstCmpToISEL.ll
@ -0,0 +1,51 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=powerpc64le-unknown-unknown -mcpu=pwr8 \
+; RUN:   -ppc-convert-rr-to-ri -verify-machineinstrs | FileCheck %s
+define void @test(i32 zeroext %parts) {
+; CHECK-LABEL: test:
+; CHECK:       # %bb.0: # %cond.end.i
+; CHECK-NEXT:    cmplwi 0, 3, 1
+; CHECK-NEXT:    bnelr+ 0
+; CHECK-NEXT:  # %bb.1: # %test2.exit.us.unr-lcssa
+; CHECK-NEXT:    ld 3, 0(3)
+; CHECK-NEXT:    std 3, 0(3)
+entry:
+  br label %cond.end.i
+
+cond.end.i:                                       ; preds = %entry
+  %cmp18.i = icmp eq i32 %parts, 1
+  br i1 %cmp18.i, label %while.body.lr.ph.i.us.preheader, label %test3.exit.split
+
+while.body.lr.ph.i.us.preheader:                  ; preds = %cond.end.i
+  %0 = icmp eq i32 %parts, 1
+  br label %for.body.i62.us.preheader
+
+for.body.i62.us.preheader:                        ; preds = %while.body.lr.ph.i.us.preheader
+  br i1 %0, label %test2.exit.us.unr-lcssa, label %for.body.i62.us.preheader.new
+
+for.body.i62.us.preheader.new:                    ; preds = %for.body.i62.us.preheader
+  br label %for.body.i62.us
+
+for.body.i62.us:                                  ; preds = %if.end.i.us.1, %for.body.i62.us.preheader.new
+  %niter = phi i64 [ undef, %for.body.i62.us.preheader.new ], [ %niter.nsub.1, %if.end.i.us.1 ]
+  %cmp8.i.us.1 = icmp uge i64 undef, 0
+  br label %if.end.i.us.1
+
+test2.exit.us.unr-lcssa: ; preds = %if.end.i.us.1, %for.body.i62.us.preheader
+  %c.addr.036.i.us.unr = phi i64 [ 0, %for.body.i62.us.preheader ], [ %c.addr.1.i.us.1, %if.end.i.us.1 ]
+  %1 = load i64, i64* undef, align 8
+  %tobool.i61.us.epil = icmp eq i64 %c.addr.036.i.us.unr, 0
+  %add.neg.i.us.epil.pn = select i1 %tobool.i61.us.epil, i64 %1, i64 0
+  %storemerge269 = sub i64 %add.neg.i.us.epil.pn, 0
+  store i64 %storemerge269, i64* undef, align 8
+  unreachable
+
+test3.exit.split:             ; preds = %cond.end.i
+  ret void
+
+if.end.i.us.1:                                    ; preds = %for.body.i62.us
+  %c.addr.1.i.us.1 = zext i1 %cmp8.i.us.1 to i64
+  %niter.nsub.1 = add i64 %niter, -2
+  %niter.ncmp.1 = icmp eq i64 %niter.nsub.1, 0
+  br i1 %niter.ncmp.1, label %test2.exit.us.unr-lcssa, label %for.body.i62.us
+}
--- a/test/CodeGen/PowerPC/unaligned.ll
+++ b/test/CodeGen/PowerPC/unaligned.ll
@ -89,7 +89,7 @@ entry:
 ; CHECK: @foo6
 ; CHECK-DAG: ld
 ; CHECK-DAG: ld
-; CHECK-DAG: stdx
+; CHECK-DAG: std
 ; CHECK: stdx

 ; For VSX on P7, unaligned loads and stores are preferable to aligned
--- a/test/CodeGen/PowerPC/variable_elem_vec_extracts.ll
+++ b/test/CodeGen/PowerPC/variable_elem_vec_extracts.ll
@ -70,9 +70,9 @@ entry:
 ; CHECK-LABEL: @getf
 ; CHECK-P7-LABEL: @getf
 ; CHECK-BE-LABEL: @getf
-; CHECK: li [[IMMREG:[0-9]+]], 3
-; CHECK: xor [[TRUNCREG:[0-9]+]], [[IMMREG]], 5
-; CHECK: lvsl [[SHMSKREG:[0-9]+]], 0, [[TRUNCREG]]
+; CHECK: xori [[TRUNCREG:[0-9]+]], 5, 3
+; CHECK: sldi [[SHIFTREG:[0-9]+]], [[TRUNCREG]], 2
+; CHECK: lvsl [[SHMSKREG:[0-9]+]], 0, [[SHIFTREG]]
 ; CHECK: vperm {{[0-9]+}}, 2, 2, [[SHMSKREG]]
 ; CHECK: xscvspdpn 1,
 ; CHECK-P7-DAG: rlwinm [[ELEMOFFREG:[0-9]+]], 5, 2, 28, 29